16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 2002-2013 International Business Machines Corporation  *
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   and others. All rights reserved.                                     *
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**************************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  file:  rematch.cpp
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//         Contains the implementation of class RegexMatcher,
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//         which is one of the main API classes for the ICU regular expression package.
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_REGULAR_EXPRESSIONS
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/regex.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/rbbi.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h"
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h"
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvectr32.h"
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvectr64.h"
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regeximp.h"
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regexst.h"
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regextxt.h"
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucase.h"
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// #include <malloc.h>        // Needed for heapcheck testing
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Find progress callback
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ----------------------
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Macro to inline test & call to ReportFindProgress().  Eliminates unnecessary function call.
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEXFINDPROGRESS_INTERRUPT(pos, status)     \
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    (fFindProgressCallbackFn != NULL) && (ReportFindProgress(pos, status) == FALSE)
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Smart Backtracking
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ------------------
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// When a failure would go back to a LOOP_C instruction,
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// strings, characters, and setrefs scan backwards for a valid start
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// character themselves, pop the stack, and save state, emulating the
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// LOOP_C's effect but assured that the next character of input is a
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// possible matching character.
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Good idea in theory; unfortunately it only helps out a few specific
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// cases and slows the engine down a little in the rest.
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Default limit for the size of the back track stack, to avoid system
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    failures causedby heap exhaustion.  Units are in 32 bit words, not bytes.
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// This value puts ICU's limits higher than most other regexp implementations,
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    which use recursion rather than the heap, and take more storage per
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    backtrack point.
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000;
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Time limit counter constant.
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   Time limits for expression evaluation are in terms of quanta of work by
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   the engine, each of which is 10,000 state saves.
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   This constant determines that state saves per tick number.
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int32_t TIMER_INITIAL_VALUE = 10000;
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-----------------------------------------------------------------------------
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   Constructor and Destructor
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-----------------------------------------------------------------------------
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(const RegexPattern *pat)  {
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fDeferredStatus = U_ZERO_ERROR;
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init(fDeferredStatus);
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (pat==NULL) {
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR;
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPattern = pat;
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init2(RegexStaticSets::gStaticSets->fEmptyText, fDeferredStatus);
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           uint32_t flags, UErrorCode &status) {
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init(status);
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError    pe;
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPattern           = fPatternOwned;
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText inputText = UTEXT_INITIALIZER;
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openConstUnicodeString(&inputText, &input, &status);
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init2(&inputText, status);
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&inputText);
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputUniStrMaybeMutable = TRUE;
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(UText *regexp, UText *input,
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           uint32_t flags, UErrorCode &status) {
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init(status);
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError    pe;
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPattern           = fPatternOwned;
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init2(input, status);
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(const UnicodeString &regexp,
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           uint32_t flags, UErrorCode &status) {
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init(status);
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError    pe;
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPattern           = fPatternOwned;
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init2(RegexStaticSets::gStaticSets->fEmptyText, status);
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(UText *regexp,
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           uint32_t flags, UErrorCode &status) {
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init(status);
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError    pe;
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPattern           = fPatternOwned;
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init2(RegexStaticSets::gStaticSets->fEmptyText, status);
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::~RegexMatcher() {
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fStack;
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fData != fSmallData) {
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_free(fData);
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fData = NULL;
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fPatternOwned) {
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete fPatternOwned;
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fPatternOwned = NULL;
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fPattern = NULL;
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fInput) {
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete fInput;
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fInputText) {
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(fInputText);
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fAltInputText) {
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(fAltInputText);
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    #if UCONFIG_NO_BREAK_ITERATION==0
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fWordBreakItr;
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    #endif
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   init()   common initialization for use by all constructors.
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//            Initialize all fields, get the object into a consistent state.
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//            This must be done even when the initial status shows an error,
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//            so that the object is initialized sufficiently well for the destructor
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//            to run safely.
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::init(UErrorCode &status) {
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPattern           = NULL;
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPatternOwned      = NULL;
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFrameSize         = 0;
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRegionStart       = 0;
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRegionLimit       = 0;
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAnchorStart       = 0;
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAnchorLimit       = 0;
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLookStart         = 0;
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLookLimit         = 0;
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fActiveStart       = 0;
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fActiveLimit       = 0;
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTransparentBounds = FALSE;
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAnchoringBounds   = TRUE;
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatch             = FALSE;
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchStart        = 0;
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchEnd          = 0;
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLastMatchEnd      = -1;
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAppendPosition    = 0;
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fHitEnd            = FALSE;
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRequireEnd        = FALSE;
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStack             = NULL;
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFrame             = NULL;
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTimeLimit         = 0;
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTime              = 0;
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTickCounter       = 0;
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStackLimit        = DEFAULT_BACKTRACK_STACK_CAPACITY;
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fCallbackFn        = NULL;
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fCallbackContext   = NULL;
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFindProgressCallbackFn      = NULL;
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFindProgressCallbackContext = NULL;
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTraceDebug        = FALSE;
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fDeferredStatus    = status;
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fData              = fSmallData;
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fWordBreakItr      = NULL;
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStack             = NULL;
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputText         = NULL;
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAltInputText      = NULL;
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInput             = NULL;
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputLength       = 0;
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputUniStrMaybeMutable = FALSE;
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fDeferredStatus = status;
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  init2()   Common initialization for use by RegexMatcher constructors, part 2.
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//            This handles the common setup to be done after the Pattern is available.
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::init2(UText *input, UErrorCode &status) {
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fDeferredStatus = status;
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(fSmallData[0]))) {
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fData = (int64_t *)uprv_malloc(fPattern->fDataSize * sizeof(int64_t));
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fData == NULL) {
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStack = new UVector64(status);
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fStack == NULL) {
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset(input);
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status);
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fDeferredStatus = status;
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar BACKSLASH  = 0x5c;
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar DOLLARSIGN = 0x24;
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    appendReplacement
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest,
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                              const UnicodeString &replacement,
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                              UErrorCode &status) {
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText replacementText = UTEXT_INITIALIZER;
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openConstUnicodeString(&replacementText, &replacement, &status);
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(status)) {
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText resultText = UTEXT_INITIALIZER;
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUnicodeString(&resultText, &dest, &status);
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_SUCCESS(status)) {
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            appendReplacement(&resultText, &replacementText, status);
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_close(&resultText);
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&replacementText);
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    appendReplacement, UText mode
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::appendReplacement(UText *dest,
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                              UText *replacement,
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                              UErrorCode &status) {
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatch == FALSE) {
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_REGEX_INVALID_STATE;
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Copy input string from the end of previous match to start of current match
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t  destLen = utext_nativeLength(dest);
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatchStart > fAppendPosition) {
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            destLen += utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition,
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                     (int32_t)(fMatchStart-fAppendPosition), &status);
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t len16;
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (UTEXT_USES_U16(fInputText)) {
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                len16 = (int32_t)(fMatchStart-fAppendPosition);
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UErrorCode lengthStatus = U_ZERO_ERROR;
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                len16 = utext_extract(fInputText, fAppendPosition, fMatchStart, NULL, 0, &lengthStatus);
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (inputChars == NULL) {
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                status = U_MEMORY_ALLOCATION_ERROR;
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return *this;
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_extract(fInputText, fAppendPosition, fMatchStart, inputChars, len16+1, &status);
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            destLen += utext_replace(dest, destLen, destLen, inputChars, len16, &status);
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uprv_free(inputChars);
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAppendPosition = fMatchEnd;
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // scan the replacement text, looking for substitutions ($n) and \escapes.
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  TODO:  optimize this loop by efficiently scanning for '$' or '\',
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //         move entire ranges not containing substitutions.
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UTEXT_SETNATIVEINDEX(replacement, 0);
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c = UTEXT_NEXT32(replacement);
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (c != U_SENTINEL) {
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c == BACKSLASH) {
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Backslash Escape.  Copy the following char out without further checks.
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //                    Note:  Surrogate pairs don't need any special handling
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //                           The second half wont be a '$' or a '\', and
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //                           will move to the dest normally on the next
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //                           loop iteration.
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = UTEXT_CURRENT32(replacement);
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c == U_SENTINEL) {
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c==0x55/*U*/ || c==0x75/*u*/) {
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // We have a \udddd or \Udddddddd escape sequence.
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t offset = 0;
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(replacement);
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 escapedChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context);
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (escapedChar != (UChar32)0xFFFFFFFF) {
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (U_IS_BMP(escapedChar)) {
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UChar c16 = (UChar)escapedChar;
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UChar surrogate[2];
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        surrogate[0] = U16_LEAD(escapedChar);
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        surrogate[1] = U16_TRAIL(escapedChar);
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (U_SUCCESS(status)) {
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // TODO:  Report errors for mal-formed \u escapes?
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //        As this is, the original sequence is output, which may be OK.
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (context.lastOffset == offset) {
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (void)UTEXT_PREVIOUS32(replacement);
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else if (context.lastOffset != offset-1) {
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_moveIndex32(replacement, offset - context.lastOffset - 1);
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (void)UTEXT_NEXT32(replacement);
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Plain backslash escape.  Just put out the escaped character.
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (U_IS_BMP(c)) {
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar c16 = (UChar)c;
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar surrogate[2];
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    surrogate[0] = U16_LEAD(c);
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    surrogate[1] = U16_TRAIL(c);
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (U_SUCCESS(status)) {
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if (c != DOLLARSIGN) {
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Normal char, not a $.  Copy it out without further checks.
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_IS_BMP(c)) {
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar c16 = (UChar)c;
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar surrogate[2];
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                surrogate[0] = U16_LEAD(c);
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                surrogate[1] = U16_TRAIL(c);
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (U_SUCCESS(status)) {
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // We've got a $.  Pick up a capture group number if one follows.
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Consume at most the number of digits necessary for the largest capture
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // number that is valid for this pattern.
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t numDigits = 0;
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t groupNum  = 0;
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 digitC;
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (;;) {
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                digitC = UTEXT_CURRENT32(replacement);
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (digitC == U_SENTINEL) {
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (u_isdigit(digitC) == FALSE) {
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (void)UTEXT_NEXT32(replacement);
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                groupNum=groupNum*10 + u_charDigitValue(digitC);
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numDigits++;
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (numDigits >= fPattern->fMaxCaptureDigits) {
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (numDigits == 0) {
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // The $ didn't introduce a group number at all.
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Treat it as just part of the substitution text.
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar c16 = DOLLARSIGN;
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Finally, append the capture group data to the destination.
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                destLen += appendGroup(groupNum, dest, status);
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (U_FAILURE(status)) {
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Can fail if group number is out of range.
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = UTEXT_NEXT32(replacement);
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    appendTail     Intended to be used in conjunction with appendReplacement()
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                   To the destination string, append everything following
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                   the last match position from the input string.
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                   Note:  Match ranges do not affect appendTail or appendReplacement
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString &RegexMatcher::appendTail(UnicodeString &dest) {
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText resultText = UTEXT_INITIALIZER;
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUnicodeString(&resultText, &dest, &status);
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(status)) {
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        appendTail(&resultText, status);
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&resultText);
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   appendTail, UText mode
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::appendTail(UText *dest, UErrorCode &status) {
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool bailOut = FALSE;
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (bailOut) {
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  dest must not be NULL
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (dest) {
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, utext_nativeLength(dest), utext_nativeLength(dest), NULL, 0, &status);
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return dest;
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fInputLength > fAppendPosition) {
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int64_t destLen = utext_nativeLength(dest);
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition,
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          (int32_t)(fInputLength-fAppendPosition), &status);
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t len16;
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (UTEXT_USES_U16(fInputText)) {
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                len16 = (int32_t)(fInputLength-fAppendPosition);
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                len16 = utext_extract(fInputText, fAppendPosition, fInputLength, NULL, 0, &status);
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                status = U_ZERO_ERROR; // buffer overflow
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16));
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (inputChars == NULL) {
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                utext_extract(fInputText, fAppendPosition, fInputLength, inputChars, len16, &status); // unterminated
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t destLen = utext_nativeLength(dest);
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                utext_replace(dest, destLen, destLen, inputChars, len16, &status);
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uprv_free(inputChars);
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   end
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::end(UErrorCode &err) const {
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return end(0, err);
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::end64(UErrorCode &err) const {
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return end64(0, err);
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::end64(int32_t group, UErrorCode &err) const {
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(err)) {
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -1;
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatch == FALSE) {
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        err = U_REGEX_INVALID_STATE;
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -1;
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (group < 0 || group > fPattern->fGroupMap->size()) {
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        err = U_INDEX_OUTOFBOUNDS_ERROR;
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -1;
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t e = -1;
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (group == 0) {
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = fMatchEnd;
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Get the position within the stack frame of the variables for
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    this capture group.
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1);
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset < fPattern->fFrameSize);
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset >= 0);
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = fFrame->fExtra[groupOffset + 1];
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return e;
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::end(int32_t group, UErrorCode &err) const {
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)end64(group, err);
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   find()
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::find() {
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Start at the position of the last match end.  (Will be zero if the
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   matcher has been reset.)
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return findUsingChunk();
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t startPos = fMatchEnd;
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (startPos==0) {
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        startPos = fActiveStart;
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatch) {
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Save the position of any previous successful match.
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLastMatchEnd = fMatchEnd;
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fMatchStart == fMatchEnd) {
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Previous match had zero length.  Move start position up one position
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  to avoid sending find() into a loop on zero-length matches.
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (startPos >= fActiveLimit) {
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fMatch = FALSE;
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTEXT_SETNATIVEINDEX(fInputText, startPos);
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (void)UTEXT_NEXT32(fInputText);
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            startPos = UTEXT_GETNATIVEINDEX(fInputText);
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fLastMatchEnd >= 0) {
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // A previous find() failed to match.  Don't try again.
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   (without this test, a pattern with a zero-length match
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    could match again at the end of an input string.)
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fHitEnd = TRUE;
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compute the position in the input string beyond which a match can not begin, because
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   the minimum length match would extend past the end of the input.
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Note:  some patterns that cannot match anything will have fMinMatchLength==Max Int.
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //          Be aware of possible overflows if making changes here.
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t testStartLimit;
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTEXT_USES_U16(fInputText)) {
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        testStartLimit = fActiveLimit - fPattern->fMinMatchLen;
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (startPos > testStartLimit) {
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fMatch = FALSE;
6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fHitEnd = TRUE;
6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // For now, let the matcher discover that it can't match on its own
6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We don't know how long the match len is in native characters
6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        testStartLimit = fActiveLimit;
6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32  c;
6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(startPos >= 0);
6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch (fPattern->fStartType) {
6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_NO_INFO:
6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // No optimization was found.
6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Try a match at each input position.
6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (;;) {
6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            MatchAt(startPos, FALSE, fDeferredStatus);
6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_FAILURE(fDeferredStatus)) {
6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fMatch) {
6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return TRUE;
6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (startPos >= testStartLimit) {
6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTEXT_SETNATIVEINDEX(fInputText, startPos);
6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (void)UTEXT_NEXT32(fInputText);
6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            startPos = UTEXT_GETNATIVEINDEX(fInputText);
6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Note that it's perfectly OK for a pattern to have a zero-length
6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   match at the end of a string, so we must make sure that the loop
6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   runs with startPos == testStartLimit the last time through.
6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_START:
6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Matches are only possible at the start of the input string
6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   (pattern begins with ^ or \A)
6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (startPos > fActiveStart) {
6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fMatch = FALSE;
6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchAt(startPos, FALSE, fDeferredStatus);
6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(fDeferredStatus)) {
6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return fMatch;
6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_SET:
7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Match may start on any char from a pre-computed set.
7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(fPattern->fMinMatchLen > 0);
7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int64_t pos;
7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTEXT_SETNATIVEINDEX(fInputText, startPos);
7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (;;) {
7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_NEXT32(fInputText);
7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                pos = UTEXT_GETNATIVEINDEX(fInputText);
7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // c will be -1 (U_SENTINEL) at end of text, in which case we
7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // skip this next block (so we don't have a negative array index)
7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // and handle end of text in the following block.
7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) ||
7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              (c>=256 && fPattern->fInitialChars->contains(c)))) {
7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    MatchAt(startPos, FALSE, fDeferredStatus);
7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (U_FAILURE(fDeferredStatus)) {
7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (fMatch) {
7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return TRUE;
7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UTEXT_SETNATIVEINDEX(fInputText, pos);
7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (startPos >= testStartLimit) {
7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fMatch = FALSE;
7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                startPos = pos;
7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_STRING:
7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_CHAR:
7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Match starts on exactly one char.
7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(fPattern->fMinMatchLen > 0);
7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 theChar = fPattern->fInitialChar;
7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int64_t pos;
7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTEXT_SETNATIVEINDEX(fInputText, startPos);
7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (;;) {
7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_NEXT32(fInputText);
7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                pos = UTEXT_GETNATIVEINDEX(fInputText);
7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c == theChar) {
7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    MatchAt(startPos, FALSE, fDeferredStatus);
7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (U_FAILURE(fDeferredStatus)) {
7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (fMatch) {
7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return TRUE;
7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UTEXT_SETNATIVEINDEX(fInputText, pos);
7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (startPos >= testStartLimit) {
7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fMatch = FALSE;
7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                startPos = pos;
7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           }
7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_LINE:
7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32  c;
7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (startPos == fAnchorStart) {
7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                MatchAt(startPos, FALSE, fDeferredStatus);
7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (U_FAILURE(fDeferredStatus)) {
7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fMatch) {
7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return TRUE;
7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, startPos);
7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_NEXT32(fInputText);
7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                startPos = UTEXT_GETNATIVEINDEX(fInputText);
7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, startPos);
7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_PREVIOUS32(fInputText);
7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, startPos);
7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fPattern->fFlags & UREGEX_UNIX_LINES) {
7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (c == 0x0a) {
7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            MatchAt(startPos, FALSE, fDeferredStatus);
7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if (U_FAILURE(fDeferredStatus)) {
7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                return FALSE;
7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if (fMatch) {
7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                return TRUE;
7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UTEXT_SETNATIVEINDEX(fInputText, startPos);
8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (startPos >= testStartLimit) {
8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fMatch = FALSE;
8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c = UTEXT_NEXT32(fInputText);
8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    startPos = UTEXT_GETNATIVEINDEX(fInputText);
8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Note that it's perfectly OK for a pattern to have a zero-length
8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   match at the end of a string, so we must make sure that the loop
8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   runs with startPos == testStartLimit the last time through.
8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org		            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) {
8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                (void)UTEXT_NEXT32(fInputText);
8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                startPos = UTEXT_GETNATIVEINDEX(fInputText);
8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            MatchAt(startPos, FALSE, fDeferredStatus);
8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if (U_FAILURE(fDeferredStatus)) {
8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                return FALSE;
8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if (fMatch) {
8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                return TRUE;
8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UTEXT_SETNATIVEINDEX(fInputText, startPos);
8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (startPos >= testStartLimit) {
8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fMatch = FALSE;
8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c = UTEXT_NEXT32(fInputText);
8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    startPos = UTEXT_GETNATIVEINDEX(fInputText);
8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Note that it's perfectly OK for a pattern to have a zero-length
8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   match at the end of a string, so we must make sure that the loop
8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   runs with startPos == testStartLimit the last time through.
8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org		            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    default:
8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(FALSE);
8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return FALSE;
8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::find(int64_t start, UErrorCode &status) {
8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    this->reset();                        // Note:  Reset() is specified by Java Matcher documentation.
8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                          //        This will reset the region to be the full input length.
8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (start < 0) {
8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t nativeStart = start;
8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchEnd = nativeStart;
8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return find();
8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   findUsingChunk() -- like find(), but with the advance knowledge that the
8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       entire string is available in the UText's chunk buffer.
8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::findUsingChunk() {
8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Start at the position of the last match end.  (Will be zero if the
8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   matcher has been reset.
8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t startPos = (int32_t)fMatchEnd;
8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (startPos==0) {
8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        startPos = (int32_t)fActiveStart;
8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *inputBuf = fInputText->chunkContents;
8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatch) {
9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Save the position of any previous successful match.
9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLastMatchEnd = fMatchEnd;
9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fMatchStart == fMatchEnd) {
9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Previous match had zero length.  Move start position up one position
9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  to avoid sending find() into a loop on zero-length matches.
9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (startPos >= fActiveLimit) {
9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fMatch = FALSE;
9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_FWD_1(inputBuf, startPos, fInputLength);
9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fLastMatchEnd >= 0) {
9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // A previous find() failed to match.  Don't try again.
9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   (without this test, a pattern with a zero-length match
9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    could match again at the end of an input string.)
9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fHitEnd = TRUE;
9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compute the position in the input string beyond which a match can not begin, because
9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   the minimum length match would extend past the end of the input.
9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Note:  some patterns that cannot match anything will have fMinMatchLength==Max Int.
9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //          Be aware of possible overflows if making changes here.
9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t testLen  = (int32_t)(fActiveLimit - fPattern->fMinMatchLen);
9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (startPos > testLen) {
9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fMatch = FALSE;
9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fHitEnd = TRUE;
9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32  c;
9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(startPos >= 0);
9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch (fPattern->fStartType) {
9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_NO_INFO:
9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // No optimization was found.
9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Try a match at each input position.
9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (;;) {
9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            MatchChunkAt(startPos, FALSE, fDeferredStatus);
9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_FAILURE(fDeferredStatus)) {
9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fMatch) {
9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return TRUE;
9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (startPos >= testLen) {
9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_FWD_1(inputBuf, startPos, fActiveLimit);
9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Note that it's perfectly OK for a pattern to have a zero-length
9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   match at the end of a string, so we must make sure that the loop
9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   runs with startPos == testLen the last time through.
9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_START:
9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Matches are only possible at the start of the input string
9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   (pattern begins with ^ or \A)
9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (startPos > fActiveStart) {
9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fMatch = FALSE;
9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchChunkAt(startPos, FALSE, fDeferredStatus);
9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(fDeferredStatus)) {
9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return fMatch;
9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_SET:
9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match may start on any char from a pre-computed set.
9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fPattern->fMinMatchLen > 0);
9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (;;) {
9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t pos = startPos;
9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if ((c<256 && fPattern->fInitialChars8->contains(c)) ||
9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (c>=256 && fPattern->fInitialChars->contains(c))) {
9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                MatchChunkAt(pos, FALSE, fDeferredStatus);
9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (U_FAILURE(fDeferredStatus)) {
9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fMatch) {
9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return TRUE;
9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (pos >= testLen) {
9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fMatch = FALSE;
9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_STRING:
10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_CHAR:
10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match starts on exactly one char.
10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fPattern->fMinMatchLen > 0);
10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 theChar = fPattern->fInitialChar;
10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (;;) {
10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t pos = startPos;
10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c == theChar) {
10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                MatchChunkAt(pos, FALSE, fDeferredStatus);
10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (U_FAILURE(fDeferredStatus)) {
10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fMatch) {
10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return TRUE;
10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (pos >= testLen) {
10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fMatch = FALSE;
10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case START_LINE:
10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32  c;
10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (startPos == fAnchorStart) {
10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            MatchChunkAt(startPos, FALSE, fDeferredStatus);
10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_FAILURE(fDeferredStatus)) {
10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fMatch) {
10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return TRUE;
10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_FWD_1(inputBuf, startPos, fActiveLimit);
10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fPattern->fFlags & UREGEX_UNIX_LINES) {
10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (;;) {
10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = inputBuf[startPos-1];
10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c == 0x0a) {
10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    MatchChunkAt(startPos, FALSE, fDeferredStatus);
10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (U_FAILURE(fDeferredStatus)) {
10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (fMatch) {
10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return TRUE;
10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (startPos >= testLen) {
10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fMatch = FALSE;
10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_FWD_1(inputBuf, startPos, fActiveLimit);
10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Note that it's perfectly OK for a pattern to have a zero-length
10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   match at the end of a string, so we must make sure that the loop
10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   runs with startPos == testLen the last time through.
10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (;;) {
10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = inputBuf[startPos-1];
10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) {
10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        startPos++;
10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    MatchChunkAt(startPos, FALSE, fDeferredStatus);
10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (U_FAILURE(fDeferredStatus)) {
10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (fMatch) {
10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return TRUE;
10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (startPos >= testLen) {
10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fMatch = FALSE;
10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_FWD_1(inputBuf, startPos, fActiveLimit);
10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Note that it's perfectly OK for a pattern to have a zero-length
10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   match at the end of a string, so we must make sure that the loop
10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   runs with startPos == testLen the last time through.
10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    default:
11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(FALSE);
11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return FALSE;
11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  group()
11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString RegexMatcher::group(UErrorCode &status) const {
11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return group(0, status);
11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Return immutable shallow clone
11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::group(UText *dest, int64_t &group_len, UErrorCode &status) const {
11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return group(0, dest, group_len, status);
11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Return immutable shallow clone
11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const {
11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    group_len = 0;
11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool bailOut = FALSE;
11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatch == FALSE) {
11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_REGEX_INVALID_STATE;
11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (bailOut) {
11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (dest) ? dest : utext_openUChars(NULL, NULL, 0, &status);
11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t s, e;
11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (groupNum == 0) {
11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = fMatchStart;
11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = fMatchEnd;
11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset < fPattern->fFrameSize);
11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset >= 0);
11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = fFrame->fExtra[groupOffset];
11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = fFrame->fExtra[groupOffset+1];
11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (s < 0) {
11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A capture group wasn't part of the match
11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return utext_clone(dest, fInputText, FALSE, TRUE, &status);
11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(s <= e);
11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    group_len = e - s;
11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dest = utext_clone(dest, fInputText, FALSE, TRUE, &status);
11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (dest)
11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UTEXT_SETNATIVEINDEX(dest, s);
11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString result;
11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return result;
11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText resultText = UTEXT_INITIALIZER;
11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUnicodeString(&resultText, &result, &status);
11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    group(groupNum, &resultText, status);
11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&resultText);
11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return result;
11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Return deep (mutable) clone
11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//		Technology Preview (as an API), but note that the UnicodeString API is implemented
11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//		using this function.
11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const {
11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool bailOut = FALSE;
11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatch == FALSE) {
12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_REGEX_INVALID_STATE;
12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (bailOut) {
12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (dest) {
12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return dest;
12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return utext_openUChars(NULL, NULL, 0, &status);
12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t s, e;
12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (groupNum == 0) {
12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = fMatchStart;
12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = fMatchEnd;
12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset < fPattern->fFrameSize);
12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset >= 0);
12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = fFrame->fExtra[groupOffset];
12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = fFrame->fExtra[groupOffset+1];
12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (s < 0) {
12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A capture group wasn't part of the match
12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (dest) {
12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return dest;
12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return utext_openUChars(NULL, NULL, 0, &status);
12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(s <= e);
12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(e <= fInputLength);
12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (dest) {
12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents+s, (int32_t)(e-s), &status);
12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UText groupText = UTEXT_INITIALIZER;
12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_openUChars(&groupText, fInputText->chunkContents+s, e-s, &status);
12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status);
12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_close(&groupText);
12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t len16;
12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTEXT_USES_U16(fInputText)) {
12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            len16 = (int32_t)(e-s);
12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UErrorCode lengthStatus = U_ZERO_ERROR;
12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus);
12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (groupChars == NULL) {
12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_MEMORY_ALLOCATION_ERROR;
12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return dest;
12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_extract(fInputText, s, e, groupChars, len16+1, &status);
12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (dest) {
12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, 0, utext_nativeLength(dest), groupChars, len16, &status);
12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UText groupText = UTEXT_INITIALIZER;
12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_openUChars(&groupText, groupChars, len16, &status);
12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status);
12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_close(&groupText);
12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_free(groupChars);
12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  appendGroup() -- currently internal only, appends a group to a UText rather
12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                   than replacing its contents
12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const {
12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t destLen = utext_nativeLength(dest);
12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatch == FALSE) {
13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_REGEX_INVALID_STATE;
13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t s, e;
13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (groupNum == 0) {
13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = fMatchStart;
13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = fMatchEnd;
13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset < fPattern->fFrameSize);
13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset >= 0);
13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = fFrame->fExtra[groupOffset];
13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = fFrame->fExtra[groupOffset+1];
13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (s < 0) {
13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A capture group wasn't part of the match
13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(s <= e);
13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t deltaLen;
13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(e <= fInputLength);
13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        deltaLen = utext_replace(dest, destLen, destLen, fInputText->chunkContents+s, (int32_t)(e-s), &status);
13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t len16;
13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTEXT_USES_U16(fInputText)) {
13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            len16 = (int32_t)(e-s);
13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UErrorCode lengthStatus = U_ZERO_ERROR;
13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus);
13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (groupChars == NULL) {
13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_MEMORY_ALLOCATION_ERROR;
13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_extract(fInputText, s, e, groupChars, len16+1, &status);
13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        deltaLen = utext_replace(dest, destLen, destLen, groupChars, len16, &status);
13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_free(groupChars);
13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return deltaLen;
13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  groupCount()
13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::groupCount() const {
13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fPattern->fGroupMap->size();
13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  hasAnchoringBounds()
13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::hasAnchoringBounds() const {
13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fAnchoringBounds;
13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  hasTransparentBounds()
13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::hasTransparentBounds() const {
13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fTransparentBounds;
13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  hitEnd()
13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::hitEnd() const {
13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fHitEnd;
13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  input()
13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeString &RegexMatcher::input() const {
14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!fInput) {
14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t len16;
14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTEXT_USES_U16(fInputText)) {
14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            len16 = (int32_t)fInputLength;
14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            len16 = utext_extract(fInputText, 0, fInputLength, NULL, 0, &status);
14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR; // overflow, length status
14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString *result = new UnicodeString(len16, 0, 0);
14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar *inputChars = result->getBuffer(len16);
14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_extract(fInputText, 0, fInputLength, inputChars, len16, &status); // unterminated warning
14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result->releaseBuffer(len16);
14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (*(const UnicodeString **)&fInput) = result; // pointer assignment, rather than operator=
14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *fInput;
14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  inputText()
14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::inputText() const {
14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fInputText;
14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  getInput() -- like inputText(), but makes a clone or copies into another UText
14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::getInput (UText *dest, UErrorCode &status) const {
14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool bailOut = FALSE;
14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bailOut = TRUE;
14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (bailOut) {
14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (dest) {
14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return dest;
14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return utext_clone(NULL, fInputText, FALSE, TRUE, &status);
14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (dest) {
14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents, (int32_t)fInputLength, &status);
14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t input16Len;
14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (UTEXT_USES_U16(fInputText)) {
14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                input16Len = (int32_t)fInputLength;
14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UErrorCode lengthStatus = U_ZERO_ERROR;
14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                input16Len = utext_extract(fInputText, 0, fInputLength, NULL, 0, &lengthStatus); // buffer overflow error
14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(input16Len));
14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (inputChars == NULL) {
14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return dest;
14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_extract(fInputText, 0, fInputLength, inputChars, input16Len, &status); // not terminated warning
14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR;
14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utext_replace(dest, 0, utext_nativeLength(dest), inputChars, input16Len, &status);
14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uprv_free(inputChars);
14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return utext_clone(NULL, fInputText, FALSE, TRUE, &status);
14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool compat_SyncMutableUTextContents(UText *ut);
14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool compat_SyncMutableUTextContents(UText *ut) {
14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool retVal = FALSE;
14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  In the following test, we're really only interested in whether the UText should switch
14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  between heap and stack allocation.  If length hasn't changed, we won't, so the chunkContents
14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  will still point to the correct data.
14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (utext_nativeLength(ut) != ut->nativeIndexingLimit) {
14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString *us=(UnicodeString *)ut->context;
14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Update to the latest length.
14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // For example, (utext_nativeLength(ut) != ut->nativeIndexingLimit).
14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t newLength = us->length();
15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Update the chunk description.
15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The buffer may have switched between stack- and heap-based.
15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ut->chunkContents    = us->getBuffer();
15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ut->chunkLength      = newLength;
15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ut->chunkNativeLimit = newLength;
15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ut->nativeIndexingLimit = newLength;
15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        retVal = TRUE;
15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return retVal;
15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  lookingAt()
15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
15176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
15186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::lookingAt(UErrorCode &status) {
15196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
15206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
15216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
15236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
15246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
15256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fInputUniStrMaybeMutable) {
15286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (compat_SyncMutableUTextContents(fInputText)) {
15296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInputLength = utext_nativeLength(fInputText);
15306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        reset();
15316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else {
15346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        resetPreserveRegion();
15356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
15376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchChunkAt((int32_t)fActiveStart, FALSE, status);
15386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
15396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchAt(fActiveStart, FALSE, status);
15406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fMatch;
15426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
15436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::lookingAt(int64_t start, UErrorCode &status) {
15466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
15476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
15486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
15506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
15516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
15526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
15546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (start < 0) {
15566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
15576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
15586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fInputUniStrMaybeMutable) {
15616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (compat_SyncMutableUTextContents(fInputText)) {
15626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInputLength = utext_nativeLength(fInputText);
15636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        reset();
15646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t nativeStart;
15686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nativeStart = start;
15696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
15706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
15716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
15726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
15756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchChunkAt((int32_t)nativeStart, FALSE, status);
15766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
15776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchAt(nativeStart, FALSE, status);
15786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fMatch;
15806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
15816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
15856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
15866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  matches()
15876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
15886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
15896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::matches(UErrorCode &status) {
15906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
15916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
15926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
15946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
15956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
15966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fInputUniStrMaybeMutable) {
15996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (compat_SyncMutableUTextContents(fInputText)) {
16006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInputLength = utext_nativeLength(fInputText);
16016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        reset();
16026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else {
16056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        resetPreserveRegion();
16066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
16096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchChunkAt((int32_t)fActiveStart, TRUE, status);
16106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
16116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchAt(fActiveStart, TRUE, status);
16126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fMatch;
16146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
16156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::matches(int64_t start, UErrorCode &status) {
16186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
16196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
16206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
16226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
16236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
16246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
16266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (start < 0) {
16286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
16296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
16306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fInputUniStrMaybeMutable) {
16336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (compat_SyncMutableUTextContents(fInputText)) {
16346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInputLength = utext_nativeLength(fInputText);
16356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        reset();
16366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t nativeStart;
16406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nativeStart = start;
16416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
16426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
16436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
16446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
16476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchChunkAt((int32_t)nativeStart, TRUE, status);
16486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
16496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        MatchAt(nativeStart, TRUE, status);
16506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fMatch;
16526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
16536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
16576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
16586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    pattern
16596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
16606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
16616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst RegexPattern &RegexMatcher::pattern() const {
16626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *fPattern;
16636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
16646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
16686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
16696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    region
16706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
16716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
16726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status) {
16736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
16746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
16756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (regionStart>regionLimit || regionStart<0 || regionLimit<0) {
16786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ILLEGAL_ARGUMENT_ERROR;
16796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t nativeStart = regionStart;
16826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t nativeLimit = regionLimit;
16836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (nativeStart > fInputLength || nativeLimit > fInputLength) {
16846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      status = U_ILLEGAL_ARGUMENT_ERROR;
16856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
16866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (startIndex == -1)
16886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      this->reset();
16896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else
16906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      resetPreserveRegion();
16916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRegionStart = nativeStart;
16936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRegionLimit = nativeLimit;
16946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fActiveStart = nativeStart;
16956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fActiveLimit = nativeLimit;
16966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (startIndex != -1) {
16986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      if (startIndex < fActiveStart || startIndex > fActiveLimit) {
16996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          status = U_INDEX_OUTOFBOUNDS_ERROR;
17006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      }
17016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      fMatchEnd = startIndex;
17026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!fTransparentBounds) {
17056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLookStart = nativeStart;
17066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLookLimit = nativeLimit;
17076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fAnchoringBounds) {
17096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fAnchorStart = nativeStart;
17106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fAnchorLimit = nativeLimit;
17116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
17136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::region(int64_t start, int64_t limit, UErrorCode &status) {
17166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return region(start, limit, -1, status);
17176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
17206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    regionEnd
17226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
17246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::regionEnd() const {
17256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)fRegionLimit;
17266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::regionEnd64() const {
17296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fRegionLimit;
17306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
17336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    regionStart
17356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
17376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::regionStart() const {
17386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)fRegionStart;
17396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::regionStart64() const {
17426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fRegionStart;
17436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
17476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    replaceAll
17496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
17516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString RegexMatcher::replaceAll(const UnicodeString &replacement, UErrorCode &status) {
17526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText replacementText = UTEXT_INITIALIZER;
17536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText resultText = UTEXT_INITIALIZER;
17546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString resultString;
17556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
17566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return resultString;
17576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openConstUnicodeString(&replacementText, &replacement, &status);
17606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUnicodeString(&resultText, &resultString, &status);
17616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    replaceAll(&replacementText, &resultText, status);
17636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&resultText);
17656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&replacementText);
17666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return resultString;
17686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    replaceAll, UText mode
17736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::replaceAll(UText *replacement, UText *dest, UErrorCode &status) {
17756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
17766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
17776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
17796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
17806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
17816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (dest == NULL) {
17846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString emptyString;
17856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText empty = UTEXT_INITIALIZER;
17866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUnicodeString(&empty, &emptyString, &status);
17886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dest = utext_clone(NULL, &empty, TRUE, FALSE, &status);
17896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&empty);
17906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(status)) {
17936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        reset();
17946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (find()) {
17956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            appendReplacement(dest, replacement, status);
17966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_FAILURE(status)) {
17976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
17986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
17996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
18006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        appendTail(dest, status);
18016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
18046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
18086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    replaceFirst
18106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
18126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString RegexMatcher::replaceFirst(const UnicodeString &replacement, UErrorCode &status) {
18136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText replacementText = UTEXT_INITIALIZER;
18146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText resultText = UTEXT_INITIALIZER;
18156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString resultString;
18166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openConstUnicodeString(&replacementText, &replacement, &status);
18186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openUnicodeString(&resultText, &resultString, &status);
18196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    replaceFirst(&replacementText, &resultText, status);
18216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&resultText);
18236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&replacementText);
18246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return resultString;
18266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    replaceFirst, UText mode
18306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::replaceFirst(UText *replacement, UText *dest, UErrorCode &status) {
18326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
18336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
18346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
18366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
18376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
18386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
18416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!find()) {
18426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return getInput(dest, status);
18436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (dest == NULL) {
18466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString emptyString;
18476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText empty = UTEXT_INITIALIZER;
18486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_openUnicodeString(&empty, &emptyString, &status);
18506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dest = utext_clone(NULL, &empty, TRUE, FALSE, &status);
18516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&empty);
18526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    appendReplacement(dest, replacement, status);
18556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    appendTail(dest, status);
18566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
18586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
18626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     requireEnd
18646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
18666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::requireEnd() const {
18676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fRequireEnd;
18686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
18726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     reset
18746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
18766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::reset() {
18776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRegionStart    = 0;
18786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRegionLimit    = fInputLength;
18796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fActiveStart    = 0;
18806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fActiveLimit    = fInputLength;
18816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAnchorStart    = 0;
18826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAnchorLimit    = fInputLength;
18836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLookStart      = 0;
18846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLookLimit      = fInputLength;
18856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    resetPreserveRegion();
18866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
18876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::resetPreserveRegion() {
18926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchStart     = 0;
18936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchEnd       = 0;
18946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLastMatchEnd   = -1;
18956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAppendPosition = 0;
18966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatch          = FALSE;
18976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fHitEnd         = FALSE;
18986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRequireEnd     = FALSE;
18996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTime           = 0;
19006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTickCounter    = TIMER_INITIAL_VALUE;
19016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //resetStack(); // more expensive than it looks...
19026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::reset(const UnicodeString &input) {
19066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputText = utext_openConstUnicodeString(fInputText, &input, &fDeferredStatus);
19076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fPattern->fNeedsAltInput) {
19086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus);
19096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputLength = utext_nativeLength(fInputText);
19116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
19136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fInput;
19146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInput = NULL;
19156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Do the following for any UnicodeString.
19176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  This is for compatibility for those clients who modify the input string "live" during regex operations.
19186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputUniStrMaybeMutable = TRUE;
19196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fWordBreakItr != NULL) {
19216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_BREAK_ITERATION==0
19226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
19236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fWordBreakItr->setText(fInputText, status);
19246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
19256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
19276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::reset(UText *input) {
19316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fInputText != input) {
19326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInputText = utext_clone(fInputText, input, FALSE, TRUE, &fDeferredStatus);
19336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fPattern->fNeedsAltInput) fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus);
19346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInputLength = utext_nativeLength(fInputText);
19356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete fInput;
19376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInput = NULL;
19386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fWordBreakItr != NULL) {
19406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_BREAK_ITERATION==0
19416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UErrorCode status = U_ZERO_ERROR;
19426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fWordBreakItr->setText(input, status);
19436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
19446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
19456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
19476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputUniStrMaybeMutable = FALSE;
19486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
19506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RegexMatcher &RegexMatcher::reset(const UChar *) {
19536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fDeferredStatus = U_INTERNAL_PROGRAM_ERROR;
19546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
19556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/
19566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) {
19586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
19596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
19606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();       // Reset also resets the region to be the entire string.
19626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (position < 0 || position > fActiveLimit) {
19646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
19656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
19666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchEnd = position;
19686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
19696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
19736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
19746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    refresh
19756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
19766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
19776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::refreshInputText(UText *input, UErrorCode &status) {
19786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
19796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
19806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (input == NULL) {
19826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ILLEGAL_ARGUMENT_ERROR;
19836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
19846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (utext_nativeLength(fInputText) != utext_nativeLength(input)) {
19866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ILLEGAL_ARGUMENT_ERROR;
19876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
19886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t  pos = utext_getNativeIndex(fInputText);
19906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Shallow read-only clone of the new UText into the existing input UText
19916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInputText = utext_clone(fInputText, input, FALSE, TRUE, &status);
19926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
19936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
19946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_setNativeIndex(fInputText, pos);
19966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fAltInputText != NULL) {
19986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pos = utext_getNativeIndex(fAltInputText);
19996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fAltInputText = utext_clone(fAltInputText, input, FALSE, TRUE, &status);
20006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
20016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return *this;
20026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_setNativeIndex(fAltInputText, pos);
20046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
20066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
20076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
20116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    setTrace
20136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
20156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setTrace(UBool state) {
20166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTraceDebug = state;
20176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
20186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------
20226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   split
20246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------------
20266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t  RegexMatcher::split(const UnicodeString &input,
20276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString    dest[],
20286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t          destCapacity,
20296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode      &status)
20306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
20316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText inputText = UTEXT_INITIALIZER;
20326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openConstUnicodeString(&inputText, &input, &status);
20336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
20346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
20356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText **destText = (UText **)uprv_malloc(sizeof(UText*)*destCapacity);
20386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (destText == NULL) {
20396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_MEMORY_ALLOCATION_ERROR;
20406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
20416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
20436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i = 0; i < destCapacity; i++) {
20446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        destText[i] = utext_openUnicodeString(NULL, &dest[i], &status);
20456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t fieldCount = split(&inputText, destText, destCapacity, status);
20486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i = 0; i < destCapacity; i++) {
20506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(destText[i]);
20516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_free(destText);
20546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_close(&inputText);
20556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fieldCount;
20566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
20576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   split, UText mode
20606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t  RegexMatcher::split(UText *input,
20626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UText           *dest[],
20636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t          destCapacity,
20646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode      &status)
20656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
20666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
20676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Check arguements for validity
20686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
20696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
20706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
20716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    };
20726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (destCapacity < 1) {
20746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ILLEGAL_ARGUMENT_ERROR;
20756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
20766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
20796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Reset for the input text
20806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
20816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset(input);
20826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t   nextOutputStringStart = 0;
20836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fActiveLimit == 0) {
20846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
20856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
20886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Loop through the input text, searching for the delimiter pattern
20896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
20906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
20916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t numCaptureGroups = fPattern->fGroupMap->size();
20926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=0; ; i++) {
20936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (i>=destCapacity-1) {
20946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // There is one or zero output string left.
20956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Fill the last output string with whatever is left from the input, then exit the loop.
20966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  ( i will be == destCapacity if we filled the output array while processing
20976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    capture groups of the delimiter expression, in which case we will discard the
20986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    last capture group saved in favor of the unprocessed remainder of the
20996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    input string.)
21006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            i = destCapacity-1;
21016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fActiveLimit > nextOutputStringStart) {
21026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
21036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (dest[i]) {
21046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
21056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                      input->chunkContents+nextOutputStringStart,
21066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                      (int32_t)(fActiveLimit-nextOutputStringStart), &status);
21076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
21086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UText remainingText = UTEXT_INITIALIZER;
21096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
21106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         fActiveLimit-nextOutputStringStart, &status);
21116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
21126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_close(&remainingText);
21136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
21146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
21156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UErrorCode lengthStatus = U_ZERO_ERROR;
21166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t remaining16Length =
21176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus);
21186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
21196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (remainingChars == NULL) {
21206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        status = U_MEMORY_ALLOCATION_ERROR;
21216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
21226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
21236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status);
21256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (dest[i]) {
21266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
21276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
21286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UText remainingText = UTEXT_INITIALIZER;
21296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
21306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
21316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_close(&remainingText);
21326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
21336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    uprv_free(remainingChars);
21356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
21366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
21376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
21386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
21396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (find()) {
21406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // We found another delimiter.  Move everything from where we started looking
21416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  up until the start of the delimiter into the next output string.
21426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
21436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (dest[i]) {
21446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
21456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                  input->chunkContents+nextOutputStringStart,
21466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                  (int32_t)(fMatchStart-nextOutputStringStart), &status);
21476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
21486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UText remainingText = UTEXT_INITIALIZER;
21496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
21506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                      fMatchStart-nextOutputStringStart, &status);
21516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
21526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_close(&remainingText);
21536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
21546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
21556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UErrorCode lengthStatus = U_ZERO_ERROR;
21566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fMatchStart, NULL, 0, &lengthStatus);
21576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
21586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (remainingChars == NULL) {
21596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    status = U_MEMORY_ALLOCATION_ERROR;
21606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
21616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
21626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                utext_extract(input, nextOutputStringStart, fMatchStart, remainingChars, remaining16Length+1, &status);
21636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (dest[i]) {
21646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
21656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
21666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UText remainingText = UTEXT_INITIALIZER;
21676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
21686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
21696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_close(&remainingText);
21706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
21716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uprv_free(remainingChars);
21736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
21746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            nextOutputStringStart = fMatchEnd;
21756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // If the delimiter pattern has capturing parentheses, the captured
21776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  text goes out into the next n destination strings.
21786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t groupNum;
21796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
21806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (i >= destCapacity-2) {
21816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Never fill the last available output string with capture group text.
21826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // It will filled with the last field, the remainder of the
21836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  unsplit input text.
21846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
21856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
21866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                i++;
21876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                dest[i] = group(groupNum, dest[i], status);
21886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
21896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (nextOutputStringStart == fActiveLimit) {
21916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // The delimiter was at the end of the string.  We're done, but first
21926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // we output one last empty string, for the empty field following
21936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   the delimiter at the end of input.
21946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (i+1 < destCapacity) {
21956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++i;
21966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (dest[i] == NULL) {
21976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        dest[i] = utext_openUChars(NULL, NULL, 0, &status);
21986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
21996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        static UChar emptyString[] = {(UChar)0};
22006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]), emptyString, 0, &status);
22016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
22026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
22036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
22046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
22066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else
22086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
22096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // We ran off the end of the input while looking for the next delimiter.
22106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // All the remaining text goes into the current output string.
22116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
22126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (dest[i]) {
22136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
22146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                  input->chunkContents+nextOutputStringStart,
22156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                  (int32_t)(fActiveLimit-nextOutputStringStart), &status);
22166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
22176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UText remainingText = UTEXT_INITIALIZER;
22186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
22196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                     fActiveLimit-nextOutputStringStart, &status);
22206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
22216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_close(&remainingText);
22226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
22236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
22246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UErrorCode lengthStatus = U_ZERO_ERROR;
22256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus);
22266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
22276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (remainingChars == NULL) {
22286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    status = U_MEMORY_ALLOCATION_ERROR;
22296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
22306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
22316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status);
22336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (dest[i]) {
22346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
22356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
22366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UText remainingText = UTEXT_INITIALIZER;
22376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
22386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
22396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utext_close(&remainingText);
22406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
22416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uprv_free(remainingChars);
22436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
22446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
22456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
22476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
22486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }   // end of for loop
22506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return i+1;
22516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
22526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
22556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     start
22576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
22596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::start(UErrorCode &status) const {
22606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return start(0, status);
22616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
22626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::start64(UErrorCode &status) const {
22646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return start64(0, status);
22656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
22666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
22686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     start(int32_t group, UErrorCode &status)
22706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
22726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::start64(int32_t group, UErrorCode &status) const {
22746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
22756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -1;
22766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
22786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
22796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -1;
22806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatch == FALSE) {
22826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_REGEX_INVALID_STATE;
22836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -1;
22846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (group < 0 || group > fPattern->fGroupMap->size()) {
22866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INDEX_OUTOFBOUNDS_ERROR;
22876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return -1;
22886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t s;
22906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (group == 0) {
22916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = fMatchStart;
22926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
22936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1);
22946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset < fPattern->fFrameSize);
22956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(groupOffset >= 0);
22966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = fFrame->fExtra[groupOffset];
22976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return s;
23006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::start(int32_t group, UErrorCode &status) const {
23046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)start64(group, status);
23056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     useAnchoringBounds
23106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::useAnchoringBounds(UBool b) {
23136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAnchoringBounds = b;
23146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAnchorStart = (fAnchoringBounds ? fRegionStart : 0);
23156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fAnchorLimit = (fAnchoringBounds ? fRegionLimit : fInputLength);
23166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
23176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     useTransparentBounds
23236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::useTransparentBounds(UBool b) {
23266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTransparentBounds = b;
23276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLookStart = (fTransparentBounds ? 0 : fRegionStart);
23286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLookLimit = (fTransparentBounds ? fInputLength : fRegionLimit);
23296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
23306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     setTimeLimit
23356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setTimeLimit(int32_t limit, UErrorCode &status) {
23386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
23396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
23406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
23426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
23436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
23446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (limit < 0) {
23466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ILLEGAL_ARGUMENT_ERROR;
23476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
23486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTimeLimit = limit;
23506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     getTimeLimit
23566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::getTimeLimit() const {
23596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fTimeLimit;
23606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     setStackLimit
23666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
23686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setStackLimit(int32_t limit, UErrorCode &status) {
23696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
23706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
23716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(fDeferredStatus)) {
23736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = fDeferredStatus;
23746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
23756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (limit < 0) {
23776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_ILLEGAL_ARGUMENT_ERROR;
23786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
23796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Reset the matcher.  This is needed here in case there is a current match
23826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    whose final stack frame (containing the match results, pointed to by fFrame)
23836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    would be lost by resizing to a smaller stack size.
23846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
23856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (limit == 0) {
23876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Unlimited stack expansion
23886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fStack->setMaxCapacity(0);
23896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
23906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Change the units of the limit  from bytes to ints, and bump the size up
23916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   to be big enough to hold at least one stack frame for the pattern,
23926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   if it isn't there already.
23936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t adjustedLimit = limit / sizeof(int32_t);
23946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (adjustedLimit < fPattern->fFrameSize) {
23956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            adjustedLimit = fPattern->fFrameSize;
23966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
23976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fStack->setMaxCapacity(adjustedLimit);
23986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStackLimit = limit;
24006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
24016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     getStackLimit
24066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::getStackLimit() const {
24096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fStackLimit;
24106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
24116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     setMatchCallback
24166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setMatchCallback(URegexMatchCallback     *callback,
24196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                    const void              *context,
24206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                    UErrorCode              &status) {
24216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
24226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
24236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
24246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fCallbackFn = callback;
24256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fCallbackContext = context;
24266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
24276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     getMatchCallback
24326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::getMatchCallback(URegexMatchCallback   *&callback,
24356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                  const void              *&context,
24366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                  UErrorCode              &status) {
24376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
24386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       return;
24396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
24406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    callback = fCallbackFn;
24416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    context  = fCallbackContext;
24426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
24436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     setMatchCallback
24486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setFindProgressCallback(URegexFindProgressCallback      *callback,
24516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                const void                      *context,
24526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                UErrorCode                      &status) {
24536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
24546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
24556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
24566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFindProgressCallbackFn = callback;
24576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFindProgressCallbackContext = context;
24586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
24596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     getMatchCallback
24646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::getFindProgressCallback(URegexFindProgressCallback    *&callback,
24676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                const void                    *&context,
24686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                UErrorCode                    &status) {
24696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
24706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       return;
24716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
24726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    callback = fFindProgressCallbackFn;
24736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    context  = fFindProgressCallbackContext;
24746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
24756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//================================================================================
24786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    Code following this point in this file is the internal
24806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    Match Engine Implementation.
24816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//================================================================================
24836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
24876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   resetStack
24886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//           Discard any previous contents of the state save stack, and initialize a
24896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//           new stack frame to all -1.  The -1s are needed for capture group limits,
24906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//           where they indicate that a group has not yet matched anything.
24916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
24926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgREStackFrame *RegexMatcher::resetStack() {
24936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Discard any previous contents of the state save stack, and initialize a
24946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  new stack frame with all -1 data.  The -1s are needed for capture group limits,
24956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  where they indicate that a group has not yet matched anything.
24966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStack->removeAllElements();
24976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrameSize, fDeferredStatus);
24996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
25006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) {
25016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        iFrame->fExtra[i] = -1;
25026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
25036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return iFrame;
25046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
25056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
25096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
25106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   isWordBoundary
25116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     in perl, "xab..cd..", \b is true at positions 0,3,5,7
25126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     For us,
25136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       If the current char is a combining mark,
25146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          \b is FALSE.
25156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       Else Scan backwards to the first non-combining char.
25166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                            We are at a boundary if the this char and the original chars are
25176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                               opposite in membership in \w set
25186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
25196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//          parameters:   pos   - the current position in the input buffer
25206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
25216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//              TODO:  double-check edge cases at region boundaries.
25226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
25236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
25246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::isWordBoundary(int64_t pos) {
25256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool isBoundary = FALSE;
25266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool cIsWord    = FALSE;
25276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (pos >= fLookLimit) {
25296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fHitEnd = TRUE;
25306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
25316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Determine whether char c at current position is a member of the word set of chars.
25326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // If we're off the end of the string, behave as though we're not at a word char.
25336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UTEXT_SETNATIVEINDEX(fInputText, pos);
25346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32  c = UTEXT_CURRENT32(fInputText);
25356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) {
25366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Current char is a combining one.  Not a boundary.
25376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
25386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
25396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c);
25406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
25416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Back up until we come to a non-combining char, determine whether
25436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  that char is a word char.
25446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool prevCIsWord = FALSE;
25456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
25466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (UTEXT_GETNATIVEINDEX(fInputText) <= fLookStart) {
25476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
25496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 prevChar = UTEXT_PREVIOUS32(fInputText);
25506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND)
25516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              || u_charType(prevChar) == U_FORMAT_CHAR)) {
25526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar);
25536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
25556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
25566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isBoundary = cIsWord ^ prevCIsWord;
25576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return isBoundary;
25586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
25596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::isChunkWordBoundary(int32_t pos) {
25616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool isBoundary = FALSE;
25626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool cIsWord    = FALSE;
25636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *inputBuf = fInputText->chunkContents;
25656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (pos >= fLookLimit) {
25676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fHitEnd = TRUE;
25686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
25696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Determine whether char c at current position is a member of the word set of chars.
25706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // If we're off the end of the string, behave as though we're not at a word char.
25716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c;
25726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U16_GET(inputBuf, fLookStart, pos, fLookLimit, c);
25736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) {
25746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Current char is a combining one.  Not a boundary.
25756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
25766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
25776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c);
25786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
25796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Back up until we come to a non-combining char, determine whether
25816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  that char is a word char.
25826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool prevCIsWord = FALSE;
25836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
25846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (pos <= fLookStart) {
25856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
25876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 prevChar;
25886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U16_PREV(inputBuf, fLookStart, pos, prevChar);
25896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND)
25906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              || u_charType(prevChar) == U_FORMAT_CHAR)) {
25916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar);
25926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
25946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
25956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isBoundary = cIsWord ^ prevCIsWord;
25966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return isBoundary;
25976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
25986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
26006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   isUWordBoundary
26026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//         Test for a word boundary using RBBI word break.
26046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//          parameters:   pos   - the current position in the input buffer
26066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
26086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::isUWordBoundary(int64_t pos) {
26096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool       returnVal = FALSE;
26106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_BREAK_ITERATION==0
26116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // If we haven't yet created a break iterator for this matcher, do it now.
26136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fWordBreakItr == NULL) {
26146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fWordBreakItr =
26156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), fDeferredStatus);
26166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(fDeferredStatus)) {
26176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
26186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
26196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fWordBreakItr->setText(fInputText, fDeferredStatus);
26206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
26216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (pos >= fLookLimit) {
26236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fHitEnd = TRUE;
26246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        returnVal = TRUE;   // With Unicode word rules, only positions within the interior of "real"
26256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            //    words are not boundaries.  All non-word chars stand by themselves,
26266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            //    with word boundaries on both sides.
26276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
26286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!UTEXT_USES_U16(fInputText)) {
26296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // !!!: Would like a better way to do this!
26306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UErrorCode status = U_ZERO_ERROR;
26316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            pos = utext_extract(fInputText, 0, pos, NULL, 0, &status);
26326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
26336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        returnVal = fWordBreakItr->isBoundary((int32_t)pos);
26346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
26356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
26366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return   returnVal;
26376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
26386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
26406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   IncrementTime     This function is called once each TIMER_INITIAL_VALUE state
26426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     saves. Increment the "time" counter, and call the
26436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     user callback function if there is one installed.
26446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     If the match operation needs to be aborted, either for a time-out
26466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     or because the user callback asked for it, just set an error status.
26476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     The engine will pick that up and stop in its outer loop.
26486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
26506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::IncrementTime(UErrorCode &status) {
26516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTickCounter = TIMER_INITIAL_VALUE;
26526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTime++;
26536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fCallbackFn != NULL) {
26546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((*fCallbackFn)(fCallbackContext, fTime) == FALSE) {
26556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_REGEX_STOPPED_BY_CALLER;
26566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
26576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
26586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
26596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fTimeLimit > 0 && fTime >= fTimeLimit) {
26606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_REGEX_TIME_OUT;
26616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
26626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
26636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
26656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   ReportFindProgress     This function is called once for each advance in the target
26676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          string from the find() function, and calls the user progress callback
26686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          function if there is one installed.
26696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          NOTE:
26716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          If the match operation needs to be aborted because the user
26736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          callback asked for it, just set an error status.
26746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          The engine will pick that up and stop in its outer loop.
26756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
26776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::ReportFindProgress(int64_t matchIndex, UErrorCode &status) {
26786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fFindProgressCallbackFn != NULL) {
26796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((*fFindProgressCallbackFn)(fFindProgressCallbackContext, matchIndex) == FALSE) {
26806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status = U_ZERO_ERROR /*U_REGEX_STOPPED_BY_CALLER*/;
26816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
26826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
26836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
26846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
26856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
26866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
26886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   StateSave
26906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       Make a new stack frame, initialized as a copy of the current stack frame.
26916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       Set the pattern index in the original stack frame from the operand value
26926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       in the opcode.  Execution of the engine continues with the state in
26936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       the newly created stack frame
26946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       Note that reserveBlock() may grow the stack, resulting in the
26966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       whole thing being relocated in memory.
26976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    Parameters:
26996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       fp           The top frame pointer when called.  At return, a new
27006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                    fame will be present
27016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       savePatIdx   An index into the compiled pattern.  Goes into the original
27026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                    (not new) frame.  If execution ever back-tracks out of the
27036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                    new frame, this will be where we continue from in the pattern.
27046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    Return
27056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                    The new frame pointer.
27066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
27076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
27086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orginline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status) {
27096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // push storage for a new frame.
27106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t *newFP = fStack->reserveBlock(fFrameSize, status);
27116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (newFP == NULL) {
27126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Failure on attempted stack expansion.
27136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Stack function set some other error code, change it to a more
27146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   specific one for regular expressions.
27156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_REGEX_STACK_OVERFLOW;
27166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We need to return a writable stack frame, so just return the
27176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    previous frame.  The match operation will stop quickly
27186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    because of the error status, after which the frame will never
27196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    be looked at again.
27206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return fp;
27216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
27226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fp = (REStackFrame *)(newFP - fFrameSize);  // in case of realloc of stack.
27236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // New stack frame = copy of old top frame.
27256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t *source = (int64_t *)fp;
27266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t *dest   = newFP;
27276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
27286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *dest++ = *source++;
27296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (source == newFP) {
27306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
27316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
27326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
27336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fTickCounter--;
27356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fTickCounter <= 0) {
27366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       IncrementTime(status);    // Re-initializes fTickCounter
27376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
27386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fp->fPatIdx = savePatIdx;
27396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (REStackFrame *)newFP;
27406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
27416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
27446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
27456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   MatchAt      This is the actual matching engine.
27466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
27476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  startIdx:    begin matching a this index.
27486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  toEnd:       if true, match must extend to end of the input region
27496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
27506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
27516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
27526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool       isMatch  = FALSE;      // True if the we have a match.
27536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t     backSearchIndex = U_INT64_MAX; // used after greedy single-character matches for searching backwards
27556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     op;                    // Operation from the compiled pattern, split into
27576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     opType;                //    the opcode
27586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     opValue;               //    and the operand value.
27596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    #ifdef REGEX_RUN_DEBUG
27616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fTraceDebug)
27626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
27636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("MatchAt(startIdx=%ld)\n", startIdx);
27646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("Original Pattern: ");
27656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c = utext_next32From(fPattern->fPattern, 0);
27666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (c != U_SENTINEL) {
27676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c<32 || c>256) {
27686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = '.';
27696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
27706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_DUMP_DEBUG_PRINTF(("%c", c));
27716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = UTEXT_NEXT32(fPattern->fPattern);
27736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
27746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("\n");
27756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("Input String: ");
27766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c = utext_next32From(fInputText, 0);
27776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (c != U_SENTINEL) {
27786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c<32 || c>256) {
27796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = '.';
27806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
27816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            printf("%c", c);
27826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = UTEXT_NEXT32(fInputText);
27846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
27856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("\n");
27866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("\n");
27876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
27886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    #endif
27896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
27916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
27926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
27936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Cache frequently referenced items from the compiled pattern
27956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
27966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t             *pat           = fPattern->fCompiledPat->getBuffer();
27976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar         *litText       = fPattern->fLiteralText.getBuffer();
27996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector             *sets          = fPattern->fSets;
28006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFrameSize = fPattern->fFrameSize;
28026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REStackFrame        *fp            = resetStack();
28036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fp->fPatIdx   = 0;
28056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fp->fInputIdx = startIdx;
28066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Zero out the pattern's static data
28086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
28096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i = 0; i<fPattern->fDataSize; i++) {
28106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fData[i] = 0;
28116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
28126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
28146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Main loop for interpreting the compiled pattern.
28156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  One iteration of the loop per pattern operation performed.
28166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
28176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
28186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
28196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (_heapchk() != _HEAPOK) {
28206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fprintf(stderr, "Heap Trouble\n");
28216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
28226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
28236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        op      = (int32_t)pat[fp->fPatIdx];
28256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        opType  = URX_TYPE(op);
28266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        opValue = URX_VAL(op);
28276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        #ifdef REGEX_RUN_DEBUG
28286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fTraceDebug) {
28296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
28306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            printf("inputIdx=%ld   inputChar=%x   sp=%3ld   activeLimit=%ld  ", fp->fInputIdx,
28316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
28326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fPattern->dumpOp(fp->fPatIdx);
28336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
28346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        #endif
28356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fp->fPatIdx++;
28366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        switch (opType) {
28386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_NOP:
28416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
28426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKTRACK:
28456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Force a backtrack.  In some circumstances, the pattern compiler
28466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   will notice that the pattern can't possibly match anything, and will
28476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   emit one of these at that point.
28486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
28496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
28506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR:
28536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx < fActiveLimit) {
28546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
28556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
28566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c == opValue) {
28576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
28586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
28596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
28606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
28616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
28626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
28636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
28646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
28656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING:
28686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
28696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Test input against a literal string.
28706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Strings require two slots in the compiled pattern, one for the
28716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   offset to the string text, and one for the length.
28726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t   stringStartIdx = opValue;
28746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op      = (int32_t)pat[fp->fPatIdx];     // Fetch the second operand
28756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx++;
28766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                opType    = URX_TYPE(op);
28776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stringLen = URX_VAL(op);
28786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opType == URX_STRING_LEN);
28796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stringLen >= 2);
28806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UChar *patternString = litText+stringStartIdx;
28826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t patternStringIndex = 0;
28836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
28846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 inputChar;
28856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 patternChar;
28866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = TRUE;
28876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                while (patternStringIndex < stringLen) {
28886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (UTEXT_GETNATIVEINDEX(fInputText) >= fActiveLimit) {
28896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
28906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
28916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
28926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
28936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    inputChar = UTEXT_NEXT32(fInputText);
28946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U16_NEXT(patternString, patternStringIndex, stringLen, patternChar);
28956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (patternChar != inputChar) {
28966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
28976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
28986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
28996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
29006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
29026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
29036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
29046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
29056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
29066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
29076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATE_SAVE:
29116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = StateSave(fp, opValue, status);
29126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END:
29166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The match loop will exit via this path on a successful match,
29176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   when we reach the end of the pattern.
29186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (toEnd && fp->fInputIdx != fActiveLimit) {
29196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // The pattern matched, but not to the end of input.  Try some more.
29206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
29216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
29226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
29236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = TRUE;
29246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto  breakFromLoop;
29256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Start and End Capture stack frame variables are laid out out like this:
29276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  fp->fExtra[opValue]  - The start of a completed capture group
29286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //             opValue+1 - The end   of a completed capture group
29296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //             opValue+2 - the start of a capture group whose end
29306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //                          has not yet been reached (and might not ever be).
29316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_START_CAPTURE:
29326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
29336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fExtra[opValue+2] = fp->fInputIdx;
29346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END_CAPTURE:
29386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
29396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(fp->fExtra[opValue+2] >= 0);            // Start pos for this group must be set.
29406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fExtra[opValue]   = fp->fExtra[opValue+2];   // Tentative start becomes real.
29416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fExtra[opValue+1] = fp->fInputIdx;           // End position
29426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]);
29436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR:                   //  $, test for End of line
29476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                           //     or for position before new line at end of input
29486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
29496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fAnchorLimit) {
29506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We really are at the end of input.  Success.
29516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
29526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRequireEnd = TRUE;
29536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
29546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
29556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
29576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If we are positioned just before a new-line that is located at the
29596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   end of input, succeed.
29606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
29616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) {
29626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) {
29636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // If not in the middle of a CR/LF sequence
29646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) {
29656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            // At new-line at end of input. Success
29666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            fHitEnd = TRUE;
29676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            fRequireEnd = TRUE;
29686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
29706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
29716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
29726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
29736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 nextC = UTEXT_NEXT32(fInputText);
29746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (c == 0x0d && nextC == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) {
29756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
29766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fRequireEnd = TRUE;
29776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;                         // At CR/LF at end of input.  Success
29786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
29796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
29806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
29826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
29836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         case URX_DOLLAR_D:                   //  $, test for End of Line, in UNIX_LINES mode.
29876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fAnchorLimit) {
29886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Off the end of input.  Success.
29896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
29906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRequireEnd = TRUE;
29916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
29926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
29936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
29946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
29956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Either at the last character of input, or off the end.
29966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) == fAnchorLimit) {
29976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
29986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRequireEnd = TRUE;
29996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
30006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
30016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
30026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Not at end of input.  Back-track out.
30046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
30056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
30066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         case URX_DOLLAR_M:                //  $, test for End of line in multi-line mode
30096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             {
30106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 if (fp->fInputIdx >= fAnchorLimit) {
30116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     // We really are at the end of input.  Success.
30126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     fHitEnd = TRUE;
30136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     fRequireEnd = TRUE;
30146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     break;
30156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 }
30166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 // If we are positioned just before a new-line, succeed.
30176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 // It makes no difference where the new-line is within the input.
30186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
30196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 UChar32 c = UTEXT_CURRENT32(fInputText);
30206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) {
30216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
30226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     //  In multi-line mode, hitting a new-line just before the end of input does not
30236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     //   set the hitEnd or requireEnd flags
30246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && UTEXT_PREVIOUS32(fInputText)==0x0d)) {
30256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
30266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     }
30276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 }
30286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 // not at a new line.  Fail.
30296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 fp = (REStackFrame *)fStack->popFrame(fFrameSize);
30306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             }
30316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             break;
30326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         case URX_DOLLAR_MD:                //  $, test for End of line in multi-line and UNIX_LINES mode
30356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             {
30366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 if (fp->fInputIdx >= fAnchorLimit) {
30376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     // We really are at the end of input.  Success.
30386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     fHitEnd = TRUE;
30396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     fRequireEnd = TRUE;  // Java set requireEnd in this case, even though
30406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     break;               //   adding a new-line would not lose the match.
30416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 }
30426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 // If we are not positioned just before a new-line, the test fails; backtrack out.
30436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 // It makes no difference where the new-line is within the input.
30446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
30456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 if (UTEXT_CURRENT32(fInputText) != 0x0a) {
30466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);
30476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 }
30486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             }
30496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             break;
30506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       case URX_CARET:                    //  ^, test for start of line
30536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx != fAnchorStart) {
30546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
30556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
30566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
30576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       case URX_CARET_M:                   //  ^, test for start of line in mulit-line mode
30606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           {
30616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               if (fp->fInputIdx == fAnchorStart) {
30626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   // We are at the start input.  Success.
30636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   break;
30646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               }
30656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               // Check whether character just before the current pos is a new-line
30666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               //   unless we are at the end of input
30676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
30686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               UChar32  c = UTEXT_PREVIOUS32(fInputText);
30696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               if ((fp->fInputIdx < fAnchorLimit) &&
30706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
30716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   //  It's a new-line.  ^ is true.  Success.
30726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   //  TODO:  what should be done with positions between a CR and LF?
30736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   break;
30746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               }
30756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               // Not at the start of a line.  Fail.
30766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               fp = (REStackFrame *)fStack->popFrame(fFrameSize);
30776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           }
30786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           break;
30796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       case URX_CARET_M_UNIX:       //  ^, test for start of line in mulit-line + Unix-line mode
30826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           {
30836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               U_ASSERT(fp->fInputIdx >= fAnchorStart);
30846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               if (fp->fInputIdx <= fAnchorStart) {
30856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   // We are at the start input.  Success.
30866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   break;
30876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               }
30886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               // Check whether character just before the current pos is a new-line
30896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               U_ASSERT(fp->fInputIdx <= fAnchorLimit);
30906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
30916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               UChar32  c = UTEXT_PREVIOUS32(fInputText);
30926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               if (c != 0x0a) {
30936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   // Not at the start of a line.  Back-track out.
30946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   fp = (REStackFrame *)fStack->popFrame(fFrameSize);
30956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               }
30966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           }
30976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           break;
30986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_B:          // Test for word boundaries
31006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
31016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = isWordBoundary(fp->fInputIdx);
31026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                success ^= (UBool)(opValue != 0);     // flip sense for \B
31036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (!success) {
31046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
31066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
31076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
31086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_BU:          // Test for word boundaries, Unicode-style
31116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
31126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = isUWordBoundary(fp->fInputIdx);
31136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                success ^= (UBool)(opValue != 0);     // flip sense for \B
31146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (!success) {
31156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
31176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
31186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
31196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_D:            // Test for decimal digit
31226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
31236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
31246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
31256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
31276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
31286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
31306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
31326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int8_t ctype = u_charType(c);     // TODO:  make a unicode set for this.  Will be faster.
31336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER);
31346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                success ^= (UBool)(opValue != 0);        // flip sense for \D
31356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
31366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
31376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
31386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
31406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
31416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
31426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_G:          // Test for position at end of previous match
31456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) {
31466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
31486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
31496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_X:
31526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  Match a Grapheme, as defined by Unicode TR 29.
31536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  Differs slightly from Perl, which consumes combining marks independently
31546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    of context.
31556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
31566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fail if at end of input
31586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
31596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
31606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
31626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
31636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
31656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Examine (and consume) the current char.
31676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Dispatch into a little state machine, based on the char.
31686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32  c;
31696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_NEXT32(fInputText);
31706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
31716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeSet **sets = fPattern->fStaticSets;
31726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_NORMAL]->contains(c))  goto GC_Extend;
31736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control;
31746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_L]->contains(c))       goto GC_L;
31756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_LV]->contains(c))      goto GC_V;
31766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
31776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_V]->contains(c))       goto GC_V;
31786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_T]->contains(c))       goto GC_T;
31796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto GC_Extend;
31806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_L:
31846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
31856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_NEXT32(fInputText);
31866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
31876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_L]->contains(c))       goto GC_L;
31886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_LV]->contains(c))      goto GC_V;
31896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
31906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_V]->contains(c))       goto GC_V;
31916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (void)UTEXT_PREVIOUS32(fInputText);
31926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
31936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto GC_Extend;
31946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_V:
31966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
31976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_NEXT32(fInputText);
31986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
31996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_V]->contains(c))       goto GC_V;
32006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_T]->contains(c))       goto GC_T;
32016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (void)UTEXT_PREVIOUS32(fInputText);
32026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
32036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto GC_Extend;
32046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_T:
32066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
32076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_NEXT32(fInputText);
32086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
32096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_T]->contains(c))       goto GC_T;
32106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (void)UTEXT_PREVIOUS32(fInputText);
32116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
32126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto GC_Extend;
32136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Extend:
32156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Combining characters are consumed here
32166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
32176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (fp->fInputIdx >= fActiveLimit) {
32186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
32196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
32206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c = UTEXT_CURRENT32(fInputText);
32216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (sets[URX_GC_EXTEND]->contains(c) == FALSE) {
32226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
32236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
32246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    (void)UTEXT_NEXT32(fInputText);
32256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
32266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
32276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                goto GC_Done;
32286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Control:
32306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Most control chars stand alone (don't combine with combining chars),
32316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   except for that CR/LF sequence is a single grapheme cluster.
32326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c == 0x0d && fp->fInputIdx < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
32336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c = UTEXT_NEXT32(fInputText);
32346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
32356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
32366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Done:
32386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
32396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
32406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
32416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
32426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
32436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_Z:          // Test for end of Input
32486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx < fAnchorLimit) {
32496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
32506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
32516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
32526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRequireEnd = TRUE;
32536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
32546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATIC_SETREF:
32596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
32606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Test input character against one of the predefined sets
32616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    (Word Characters, for example)
32626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // The high bit of the op value is a flag for the match polarity.
32636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    0:   success if input char is in set.
32646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    1:   success if input char is not in set.
32656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
32666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
32676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
32686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
32696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
32706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET);
32726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                opValue &= ~URX_NEG_SET;
32736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
32746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
32766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
32776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c < 256) {
32786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
32796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s8->contains(c)) {
32806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = !success;
32816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
32826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
32836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
32846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s->contains(c)) {
32856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = !success;
32866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
32876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
32886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
32896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
32906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
32916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // the character wasn't in the set.
32926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
32936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
32946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
32956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STAT_SETREF_N:
32996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
33006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Test input character for NOT being a member of  one of
33016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    the predefined sets (Word Characters, for example)
33026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
33036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
33046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
33056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
33066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
33096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
33116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
33136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c < 256) {
33146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
33156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s8->contains(c) == FALSE) {
33166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
33176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
33186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
33196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
33206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
33216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s->contains(c) == FALSE) {
33226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
33236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
33246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
33256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // the character wasn't in the set.
33276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
33286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
33306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_SETREF:
33336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fActiveLimit) {
33346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
33356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
33366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
33376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
33386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
33396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // There is input left.  Pick up one char and test it for set membership.
33416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
33426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < sets->size());
33436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c<256) {
33446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    Regex8BitSet *s8 = &fPattern->fSets8[opValue];
33456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s8->contains(c)) {
33466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
33476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
33486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
33496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
33506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
33516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s->contains(c)) {
33526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // The character is in the set.  A Match.
33536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
33546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
33556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
33566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // the character wasn't in the set.
33596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
33606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
33626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY:
33656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
33666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // . matches anything, but stops at end-of-line.
33676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
33686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // At end of input.  Match failed.  Backtrack out.
33696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
33706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
33716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
33726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
33756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // There is input left.  Advance over one char, unless we've hit end-of-line
33776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
33786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
33796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
33806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // End of line in normal mode.   . does not match.
33816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
33826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
33836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
33856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
33876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_ALL:
33906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
33916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // ., in dot-matches-all (including new lines) mode
33926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
33936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // At end of input.  Match failed.  Backtrack out.
33946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
33956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
33966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
33976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
34006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // There is input left.  Advance over one char, except if we are
34026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   at a cr/lf, advance over both of them.
34036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c;
34046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = UTEXT_NEXT32(fInputText);
34056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
34066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c==0x0d && fp->fInputIdx < fActiveLimit) {
34076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // In the case of a CR/LF, we need to advance over both.
34086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 nextc = UTEXT_CURRENT32(fInputText);
34096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (nextc == 0x0a) {
34106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (void)UTEXT_NEXT32(fInputText);
34116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
34126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
34136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
34146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
34156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
34166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_UNIX:
34196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
34206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // '.' operator, matches all, but stops at end-of-line.
34216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   UNIX_LINES mode, so 0x0a is the only recognized line ending.
34226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
34236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // At end of input.  Match failed.  Backtrack out.
34246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
34256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
34266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
34276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
34286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
34306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // There is input left.  Advance over one char, unless we've hit end-of-line
34326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
34336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c == 0x0a) {
34346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // End of line in normal mode.   '.' does not match the \n
34356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
34366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
34376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
34386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
34396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
34406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
34416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP:
34446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fPatIdx = opValue;
34456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
34466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_FAIL:
34486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = FALSE;
34496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto breakFromLoop;
34506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV:
34526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(opValue < fPattern->fCompiledPat->size());
34536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = StateSave(fp, fp->fPatIdx, status);       // State save to loc following current
34546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fPatIdx = opValue;                         // Then JMP.
34556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
34566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV_X:
34586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // This opcode is used with (x)+, when x can match a zero length string.
34596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Same as JMP_SAV, except conditional on the match having made forward progress.
34606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the
34616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   data address of the input position at the start of the loop.
34626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
34636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size());
34646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  stoOp = (int32_t)pat[opValue-1];
34656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC);
34666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  frameLoc = URX_VAL(stoOp);
34676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize);
34686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t prevInputIdx = fp->fExtra[frameLoc];
34696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(prevInputIdx <= fp->fInputIdx);
34706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (prevInputIdx < fp->fInputIdx) {
34716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // The match did make progress.  Repeat the loop.
34726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = StateSave(fp, fp->fPatIdx, status);  // State save to loc following current
34736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = opValue;
34746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fExtra[frameLoc] = fp->fInputIdx;
34756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
34766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If the input position did not advance, we do nothing here,
34776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   execution will fall out of the loop.
34786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
34796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
34806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT:
34826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
34836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
34846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[opValue] = 0;                 //  Set the loop counter variable to zero
34856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Pick up the three extra operands that CTR_INIT has, and
34876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    skip the pattern location counter past
34886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
34896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx += 3;
34906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
34916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
34926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
34936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minCount>=0);
34946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(maxCount>=minCount || maxCount==-1);
34956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(loopLoc>=fp->fPatIdx);
34966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (minCount == 0) {
34986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = StateSave(fp, loopLoc+1, status);
34996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (maxCount == -1) {
35016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fExtra[opValue+1] = fp->fInputIdx;   //  For loop breaking.
35026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else if (maxCount == 0) {
35036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
35046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
35076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP:
35096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
35106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
35116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t initOp = (int32_t)pat[opValue];
35126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT);
35136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
35146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minCount  = (int32_t)pat[opValue+2];
35156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxCount  = (int32_t)pat[opValue+3];
35166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (*pCounter)++;
35176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
35186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(*pCounter == maxCount);
35196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
35206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*pCounter >= minCount) {
35226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (maxCount == -1) {
35236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // Loop has no hard upper bound.
35246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // Check that it is progressing through the input, break if it is not.
35256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        int64_t *pLastInputIdx =  &fp->fExtra[URX_VAL(initOp) + 1];
35266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (fp->fInputIdx == *pLastInputIdx) {
35276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
35286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        } else {
35296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            *pLastInputIdx = fp->fInputIdx;
35306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
35316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
35326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = StateSave(fp, fp->fPatIdx, status);
35336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx = opValue + 4;    // Loop back.
35356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
35376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT_NG:
35396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
35406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Initialize a non-greedy loop
35416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
35426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[opValue] = 0;                 //  Set the loop counter variable to zero
35436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Pick up the three extra operands that CTR_INIT_NG has, and
35456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    skip the pattern location counter past
35466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
35476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx += 3;
35486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
35496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
35506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
35516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minCount>=0);
35526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(maxCount>=minCount || maxCount==-1);
35536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(loopLoc>fp->fPatIdx);
35546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (maxCount == -1) {
35556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fExtra[opValue+1] = fp->fInputIdx;   //  Save initial input index for loop breaking.
35566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (minCount == 0) {
35596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (maxCount != 0) {
35606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp = StateSave(fp, fp->fPatIdx, status);
35616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
35626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = loopLoc+1;   // Continue with stuff after repeated block
35636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
35666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP_NG:
35686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
35696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Non-greedy {min, max} loops
35706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
35716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t initOp = (int32_t)pat[opValue];
35726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG);
35736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
35746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minCount  = (int32_t)pat[opValue+2];
35756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxCount  = (int32_t)pat[opValue+3];
35766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (*pCounter)++;
35786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
35796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // The loop has matched the maximum permitted number of times.
35806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   Break out of here with no action.  Matching will
35816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   continue with the following pattern.
35826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(*pCounter == maxCount);
35836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
35846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*pCounter < minCount) {
35876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We haven't met the minimum number of matches yet.
35886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   Loop back for another one.
35896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = opValue + 4;    // Loop back.
35906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
35916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We do have the minimum number of matches.
35926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // If there is no upper bound on the loop iterations, check that the input index
35946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // is progressing, and stop the loop if it is not.
35956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (maxCount == -1) {
35966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        int64_t *pLastInputIdx =  &fp->fExtra[URX_VAL(initOp) + 1];
35976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (fp->fInputIdx == *pLastInputIdx) {
35986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
35996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
36006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *pLastInputIdx = fp->fInputIdx;
36016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
36026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Loop Continuation: we will fall into the pattern following the loop
36046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   (non-greedy, don't execute loop body first), but first do
36056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   a state save to the top of the loop, so that a match failure
36066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   in the following pattern will try another iteration of the loop.
36076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = StateSave(fp, opValue + 4, status);
36086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
36096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
36106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
36116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_SP:
36136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
36146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fData[opValue] = fStack->size();
36156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
36166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LD_SP:
36186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
36196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
36206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t newStackSize = (int32_t)fData[opValue];
36216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(newStackSize <= fStack->size());
36226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
36236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (newFP == (int64_t *)fp) {
36246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
36256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
36266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t i;
36276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (i=0; i<fFrameSize; i++) {
36286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    newFP[i] = ((int64_t *)fp)[i];
36296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
36306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)newFP;
36316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fStack->setSize(newStackSize);
36326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
36336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
36346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF:
36366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
36376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue < fFrameSize);
36386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t groupStartIdx = fp->fExtra[opValue];
36396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t groupEndIdx   = fp->fExtra[opValue+1];
36406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(groupStartIdx <= groupEndIdx);
36416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (groupStartIdx < 0) {
36426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // This capture group has not participated in the match thus far,
36436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
36446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
36456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
36466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fAltInputText, groupStartIdx);
36476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
36486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Note: if the capture group match was of an empty string the backref
36506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //         match succeeds.  Verified by testing:  Perl matches succeed
36516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //         in this case, so we do too.
36526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = TRUE;
36546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
36556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (utext_getNativeIndex(fAltInputText) >= groupEndIdx) {
36566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = TRUE;
36576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
36586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
36596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (utext_getNativeIndex(fInputText) >= fActiveLimit) {
36606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
36616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
36626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
36636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
36646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 captureGroupChar = utext_next32(fAltInputText);
36656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 inputChar = utext_next32(fInputText);
36666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputChar != captureGroupChar) {
36676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
36686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
36696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
36706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
36716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
36736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
36746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
36756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
36766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
36776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
36786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
36796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF_I:
36836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
36846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue < fFrameSize);
36856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t groupStartIdx = fp->fExtra[opValue];
36866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t groupEndIdx   = fp->fExtra[opValue+1];
36876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(groupStartIdx <= groupEndIdx);
36886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (groupStartIdx < 0) {
36896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // This capture group has not participated in the match thus far,
36906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
36916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
36926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
36936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                utext_setNativeIndex(fAltInputText, groupStartIdx);
36946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                utext_setNativeIndex(fInputText, fp->fInputIdx);
36956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                CaseFoldingUTextIterator captureGroupItr(*fAltInputText);
36966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                CaseFoldingUTextIterator inputItr(*fInputText);
36976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Note: if the capture group match was of an empty string the backref
36996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //         match succeeds.  Verified by testing:  Perl matches succeed
37006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //         in this case, so we do too.
37016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = TRUE;
37036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
37046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (!captureGroupItr.inExpansion() && utext_getNativeIndex(fAltInputText) >= groupEndIdx) {
37056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = TRUE;
37066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
37076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
37086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (!inputItr.inExpansion() && utext_getNativeIndex(fInputText) >= fActiveLimit) {
37096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
37106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
37116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
37126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
37136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 captureGroupChar = captureGroupItr.next();
37146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 inputChar = inputItr.next();
37156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputChar != captureGroupChar) {
37166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
37176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
37186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
37196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
37206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success && inputItr.inExpansion()) {
37226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We otained a match by consuming part of a string obtained from
37236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // case-folding a single code point of the input text.
37246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // This does not count as an overall match.
37256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    success = FALSE;
37266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
37276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
37296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
37306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
37316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
37326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
37336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
37356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
37366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_INP_LOC:
37386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
37396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue >= 0 && opValue < fFrameSize);
37406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[opValue] = fp->fInputIdx;
37416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
37426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
37436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMPX:
37456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
37466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
37476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx += 1;
37486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t dataLoc  = URX_VAL(pat[instrOperandLoc]);
37496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize);
37506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t savedInputIdx = fp->fExtra[dataLoc];
37516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(savedInputIdx <= fp->fInputIdx);
37526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (savedInputIdx < fp->fInputIdx) {
37536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = opValue;                               // JMP
37546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
37556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no progress in loop.
37566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
37576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
37586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
37596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_START:
37616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
37626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Entering a lookahead block.
37636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save Stack Ptr, Input Pos.
37646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
37656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue]   = fStack->size();
37666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue+1] = fp->fInputIdx;
37676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveStart     = fLookStart;          // Set the match region change for
37686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit     = fLookLimit;          //   transparent bounds.
37696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
37706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
37716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_END:
37736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
37746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Leaving a look-ahead block.
37756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  restore Stack Ptr, Input Pos to positions they had on entry to block.
37766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
37776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stackSize = fStack->size();
37786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t newStackSize =(int32_t)fData[opValue];
37796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stackSize >= newStackSize);
37806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (stackSize > newStackSize) {
37816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Copy the current top frame back to the new (cut back) top frame.
37826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   This makes the capture groups from within the look-ahead
37836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   expression available.
37846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
37856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t i;
37866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    for (i=0; i<fFrameSize; i++) {
37876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        newFP[i] = ((int64_t *)fp)[i];
37886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
37896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)newFP;
37906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fStack->setSize(newStackSize);
37916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
37926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = fData[opValue+1];
37936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Restore the active region bounds in the input string; they may have
37956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    been changed because of transparent bounds on a Region.
37966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveStart = fRegionStart;
37976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit = fRegionLimit;
37986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
37996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
38006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR_I:
38026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Case insensitive one char.  The char from the pattern is already case folded.
38036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Input text is not, but case folding the input can not reduce two or more code
38046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // points to one.
38056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx < fActiveLimit) {
38066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
38076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = UTEXT_NEXT32(fInputText);
38096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
38106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
38116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
38126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
38136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
38146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
38156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
38166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
38186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
38196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_I:
38216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
38226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Case-insensitive test input against a literal string.
38236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Strings require two slots in the compiled pattern, one for the
38246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   offset to the string text, and one for the length.
38256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   The compiled string has already been case folded.
38266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
38276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    const UChar *patternString = litText + opValue;
38286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t      patternStringIdx  = 0;
38296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    op      = (int32_t)pat[fp->fPatIdx];
38316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx++;
38326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    opType  = URX_TYPE(op);
38336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    opValue = URX_VAL(op);
38346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(opType == URX_STRING_LEN);
38356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t patternStringLen = opValue;  // Length of the string from the pattern.
38366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32   cPattern;
38396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32   cText;
38406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UBool     success = TRUE;
38416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
38436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    CaseFoldingUTextIterator inputIterator(*fInputText);
38446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    while (patternStringIdx < patternStringLen) {
38456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (!inputIterator.inExpansion() && UTEXT_GETNATIVEINDEX(fInputText) >= fActiveLimit) {
38466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            success = FALSE;
38476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            fHitEnd = TRUE;
38486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
38496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
38506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        U16_NEXT(patternString, patternStringIdx, patternStringLen, cPattern);
38516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cText = inputIterator.next();
38526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (cText != cPattern) {
38536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            success = FALSE;
38546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
38556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
38566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
38576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputIterator.inExpansion()) {
38586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
38596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
38606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (success) {
38626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
38636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
38646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
38656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
38666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
38676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
38686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
38696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_START:
38716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
38726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Entering a look-behind block.
38736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save Stack Ptr, Input Pos.
38746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   TODO:  implement transparent bounds.  Ticket #6067
38756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
38766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue]   = fStack->size();
38776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue+1] = fp->fInputIdx;
38786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Init the variable containing the start index for attempted matches.
38796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue+2] = -1;
38806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save input string length, then reset to pin any matches to end at
38816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   the current position.
38826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue+3] = fActiveLimit;
38836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit     = fp->fInputIdx;
38846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
38856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
38866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_CONT:
38896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
38906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Positive Look-Behind, at top of loop checking for matches of LB expression
38916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    at all possible input starting positions.
38926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetch the min and max possible match lengths.  They are the operands
38946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   of this op in the pattern.
38956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minML = (int32_t)pat[fp->fPatIdx++];
38966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxML = (int32_t)pat[fp->fPatIdx++];
38976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minML <= maxML);
38986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minML >= 0);
38996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetch (from data) the last input index where a match was attempted.
39016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
39026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t  *lbStartIdx = &fData[opValue+2];
39036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*lbStartIdx < 0) {
39046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // First time through loop.
39056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *lbStartIdx = fp->fInputIdx - minML;
39066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
39076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // 2nd through nth time through the loop.
39086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Back up start position for match by one.
39096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (*lbStartIdx == 0) {
39106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (*lbStartIdx)--;
39116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
39126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx);
39136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (void)UTEXT_PREVIOUS32(fInputText);
39146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
39156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
39166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
39176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
39196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We have tried all potential match starting points without
39206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  getting a match.  Backtrack out, and out of the
39216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   Look Behind altogether.
39226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
39236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int64_t restoreInputLen = fData[opValue+3];
39246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(restoreInputLen >= fActiveLimit);
39256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(restoreInputLen <= fInputLength);
39266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fActiveLimit = restoreInputLen;
39276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
39286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
39296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
39316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //      (successful match will fall off the end of the loop.)
39326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx-3, status);
39336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = *lbStartIdx;
39346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
39356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
39366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_END:
39386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // End of a look-behind block, after a successful match.
39396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
39406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
39416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx != fActiveLimit) {
39426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  The look-behind expression matched, but the match did not
39436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //    extend all the way to the point that we are looking behind from.
39446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  FAIL out of here, which will take us back to the LB_CONT, which
39456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //     will retry the match starting at another position or fail
39466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //     the look-behind altogether, whichever is appropriate.
39476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
39486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
39496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
39506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Look-behind match is good.  Restore the orignal input string length,
39526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   which had been truncated to pin the end of the lookbehind match to the
39536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   position being looked-behind.
39546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t originalInputLen = fData[opValue+3];
39556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(originalInputLen >= fActiveLimit);
39566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(originalInputLen <= fInputLength);
39576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit = originalInputLen;
39586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
39596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
39606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_CONT:
39636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
39646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Negative Look-Behind, at top of loop checking for matches of LB expression
39656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    at all possible input starting positions.
39666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetch the extra parameters of this op.
39686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minML       = (int32_t)pat[fp->fPatIdx++];
39696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxML       = (int32_t)pat[fp->fPatIdx++];
39706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t continueLoc = (int32_t)pat[fp->fPatIdx++];
39716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        continueLoc = URX_VAL(continueLoc);
39726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minML <= maxML);
39736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minML >= 0);
39746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(continueLoc > fp->fPatIdx);
39756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetch (from data) the last input index where a match was attempted.
39776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
39786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t  *lbStartIdx = &fData[opValue+2];
39796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*lbStartIdx < 0) {
39806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // First time through loop.
39816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *lbStartIdx = fp->fInputIdx - minML;
39826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
39836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // 2nd through nth time through the loop.
39846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Back up start position for match by one.
39856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (*lbStartIdx == 0) {
39866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (*lbStartIdx)--;
39876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
39886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx);
39896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (void)UTEXT_PREVIOUS32(fInputText);
39906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
39916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
39926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
39936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
39956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We have tried all potential match starting points without
39966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  getting a match, which means that the negative lookbehind as
39976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  a whole has succeeded.  Jump forward to the continue location
39986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int64_t restoreInputLen = fData[opValue+3];
39996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(restoreInputLen >= fActiveLimit);
40006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(restoreInputLen <= fInputLength);
40016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fActiveLimit = restoreInputLen;
40026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = continueLoc;
40036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
40046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
40056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
40076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //      (successful match will cause a FAIL out of the loop altogether.)
40086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx-4, status);
40096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = *lbStartIdx;
40106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
40116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
40126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_END:
40146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // End of a negative look-behind block, after a successful match.
40156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
40166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
40176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx != fActiveLimit) {
40186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  The look-behind expression matched, but the match did not
40196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //    extend all the way to the point that we are looking behind from.
40206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  FAIL out of here, which will take us back to the LB_CONT, which
40216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //     will retry the match starting at another position or succeed
40226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //     the look-behind altogether, whichever is appropriate.
40236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
40246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
40256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
40266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Look-behind expression matched, which means look-behind test as
40286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   a whole Fails
40296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Restore the orignal input string length, which had been truncated
40316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   inorder to pin the end of the lookbehind match
40326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   to the position being looked-behind.
40336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t originalInputLen = fData[opValue+3];
40346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(originalInputLen >= fActiveLimit);
40356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(originalInputLen <= fInputLength);
40366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit = originalInputLen;
40376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Restore original stack position, discarding any state saved
40396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   by the successful pattern match.
40406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
40416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t newStackSize = (int32_t)fData[opValue];
40426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(fStack->size() > newStackSize);
40436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fStack->setSize(newStackSize);
40446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  FAIL, which will take control back to someplace
40466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  prior to entering the look-behind test.
40476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
40486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
40496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
40506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_SR_I:
40536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Loop Initialization for the optimized implementation of
40546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //     [some character set]*
40556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   This op scans through all matching input.
40566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
40576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
40586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < sets->size());
40596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                Regex8BitSet *s8 = &fPattern->fSets8[opValue];
40606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeSet   *s  = (UnicodeSet *)sets->elementAt(opValue);
40616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Loop through input, until either the input is exhausted or
40636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   we reach a character that is not a member of the set.
40646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t ix = fp->fInputIdx;
40656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, ix);
40666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
40676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (ix >= fActiveLimit) {
40686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
40696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
40706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
40716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 c = UTEXT_NEXT32(fInputText);
40726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (c<256) {
40736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (s8->contains(c) == FALSE) {
40746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
40756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
40766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
40776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (s->contains(c) == FALSE) {
40786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
40796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
40806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
40816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ix = UTEXT_GETNATIVEINDEX(fInputText);
40826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
40836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If there were no matching characters, skip over the loop altogether.
40856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   The loop doesn't run at all, a * op always succeeds.
40866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (ix == fp->fInputIdx) {
40876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
40886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
40896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
40906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
40926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   must follow.  It's operand is the stack location
40936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   that holds the starting input index for the match of this [set]*
40946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
40956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
40966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stackLoc = URX_VAL(loopcOp);
40976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
40986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[stackLoc] = fp->fInputIdx;
40996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = ix;
41006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save State to the URX_LOOP_C op that follows this one,
41026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   so that match failures in the following code will return to there.
41036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
41046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx, status);
41056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx++;
41066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
41076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
41086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_DOT_I:
41116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Loop Initialization for the optimized implementation of .*
41126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   This op scans through all remaining input.
41136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
41146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
41156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Loop through input until the input is exhausted (we reach an end-of-line)
41166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // In DOTALL mode, we can just go straight to the end of the input.
41176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t ix;
41186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((opValue & 1) == 1) {
41196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Dot-matches-All mode.  Jump straight to the end of the string.
41206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ix = fActiveLimit;
41216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
41226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
41236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // NOT DOT ALL mode.  Line endings do not match '.'
41246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Scan forward until a line ending or end of input.
41256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ix = fp->fInputIdx;
41266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UTEXT_SETNATIVEINDEX(fInputText, ix);
41276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    for (;;) {
41286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (ix >= fActiveLimit) {
41296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            fHitEnd = TRUE;
41306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
41316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
41326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UChar32 c = UTEXT_NEXT32(fInputText);
41336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if ((c & 0x7f) <= 0x29) {          // Fast filter of non-new-line-s
41346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if ((c == 0x0a) ||             //  0x0a is newline in both modes.
41356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                               (((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
41366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                    (c<=0x0d && c>=0x0a)) || c==0x85 ||c==0x2028 || c==0x2029) {
41376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                //  char is a line ending.  Exit the scanning loop.
41386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                break;
41396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
41406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
41416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ix = UTEXT_GETNATIVEINDEX(fInputText);
41426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
41436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
41446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If there were no matching characters, skip over the loop altogether.
41466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   The loop doesn't run at all, a * op always succeeds.
41476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (ix == fp->fInputIdx) {
41486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
41496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
41506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
41516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
41536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   must follow.  It's operand is the stack location
41546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   that holds the starting input index for the match of this .*
41556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
41566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
41576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stackLoc = URX_VAL(loopcOp);
41586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
41596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[stackLoc] = fp->fInputIdx;
41606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = ix;
41616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save State to the URX_LOOP_C op that follows this one,
41636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   so that match failures in the following code will return to there.
41646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
41656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx, status);
41666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx++;
41676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
41686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
41696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_C:
41726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
41736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue<fFrameSize);
41746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                backSearchIndex = fp->fExtra[opValue];
41756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(backSearchIndex <= fp->fInputIdx);
41766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (backSearchIndex == fp->fInputIdx) {
41776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We've backed up the input idx to the point that the loop started.
41786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // The loop is done.  Leave here without saving state.
41796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  Subsequent failures won't come back here.
41806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
41816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
41826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Set up for the next iteration of the loop, with input index
41836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   backed up by one from the last time through,
41846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   and a state save to this instruction in case the following code fails again.
41856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   (We're going backwards because this loop emulates stack unwinding, not
41866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    the initial scan forward.)
41876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(fp->fInputIdx > 0);
41886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
41896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 prevC = UTEXT_PREVIOUS32(fInputText);
41906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
41916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 twoPrevC = UTEXT_PREVIOUS32(fInputText);
41936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (prevC == 0x0a &&
41946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx > backSearchIndex &&
41956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    twoPrevC == 0x0d) {
41966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t prevOp = (int32_t)pat[fp->fPatIdx-2];
41976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) {
41986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // .*, stepping back over CRLF pair.
41996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
42006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
42016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
42026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx-1, status);
42056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
42076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        default:
42116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Trouble.  The compiled pattern contains an entry with an
42126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //           unrecognized type tag.
42136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(FALSE);
42146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
42176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = FALSE;
42186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
42196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
42216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbreakFromLoop:
42236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatch = isMatch;
42246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (isMatch) {
42256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLastMatchEnd = fMatchEnd;
42266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fMatchStart   = startIdx;
42276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fMatchEnd     = fp->fInputIdx;
42286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fTraceDebug) {
42296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_RUN_DEBUG_PRINTF(("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd));
42306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
42326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else
42336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
42346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fTraceDebug) {
42356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
42366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
42386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFrame = fp;                // The active stack frame when the engine stopped.
42406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                //   Contains the capture group results that we need to
42416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                //    access later.
42426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return;
42436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
42446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
42476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
42486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   MatchChunkAt   This is the actual matching engine. Like MatchAt, but with the
42496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  assumption that the entire string is available in the UText's
42506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  chunk buffer. For now, that means we can use int32_t indexes,
42516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  except for anything that needs to be saved (like group starts
42526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  and ends).
42536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
42546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  startIdx:    begin matching a this index.
42556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                  toEnd:       if true, match must extend to end of the input region
42566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
42576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------------------
42586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
42596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool       isMatch  = FALSE;      // True if the we have a match.
42606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     backSearchIndex = INT32_MAX; // used after greedy single-character matches for searching backwards
42626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     op;                    // Operation from the compiled pattern, split into
42646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     opType;                //    the opcode
42656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t     opValue;               //    and the operand value.
42666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifdef REGEX_RUN_DEBUG
42686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fTraceDebug)
42696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
42706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("MatchAt(startIdx=%d)\n", startIdx);
42716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("Original Pattern: ");
42726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c = utext_next32From(fPattern->fPattern, 0);
42736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (c != U_SENTINEL) {
42746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c<32 || c>256) {
42756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = '.';
42766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_DUMP_DEBUG_PRINTF(("%c", c));
42786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = UTEXT_NEXT32(fPattern->fPattern);
42806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("\n");
42826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("Input String: ");
42836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c = utext_next32From(fInputText, 0);
42846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (c != U_SENTINEL) {
42856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c<32 || c>256) {
42866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = '.';
42876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            printf("%c", c);
42896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c = UTEXT_NEXT32(fInputText);
42916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("\n");
42936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("\n");
42946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
42956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
42966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
42986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
42996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
43006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Cache frequently referenced items from the compiled pattern
43026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t             *pat           = fPattern->fCompiledPat->getBuffer();
43046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar         *litText       = fPattern->fLiteralText.getBuffer();
43066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector             *sets          = fPattern->fSets;
43076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar         *inputBuf      = fInputText->chunkContents;
43096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFrameSize = fPattern->fFrameSize;
43116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    REStackFrame        *fp            = resetStack();
43126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fp->fPatIdx   = 0;
43146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fp->fInputIdx = startIdx;
43156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Zero out the pattern's static data
43176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
43186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i = 0; i<fPattern->fDataSize; i++) {
43196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fData[i] = 0;
43206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
43216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Main loop for interpreting the compiled pattern.
43246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  One iteration of the loop per pattern operation performed.
43256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
43266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
43276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
43286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (_heapchk() != _HEAPOK) {
43296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fprintf(stderr, "Heap Trouble\n");
43306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
43316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
43326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        op      = (int32_t)pat[fp->fPatIdx];
43346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        opType  = URX_TYPE(op);
43356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        opValue = URX_VAL(op);
43366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifdef REGEX_RUN_DEBUG
43376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fTraceDebug) {
43386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
43396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            printf("inputIdx=%ld   inputChar=%x   sp=%3ld   activeLimit=%ld  ", fp->fInputIdx,
43406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
43416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fPattern->dumpOp(fp->fPatIdx);
43426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
43436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
43446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fp->fPatIdx++;
43456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        switch (opType) {
43476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_NOP:
43506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
43516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKTRACK:
43546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Force a backtrack.  In some circumstances, the pattern compiler
43556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   will notice that the pattern can't possibly match anything, and will
43566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   emit one of these at that point.
43576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
43586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
43596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR:
43626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx < fActiveLimit) {
43636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c;
43646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
43656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c == opValue) {
43666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
43676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
43686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
43696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
43706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
43716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
43726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
43736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING:
43766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
43776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Test input against a literal string.
43786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Strings require two slots in the compiled pattern, one for the
43796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   offset to the string text, and one for the length.
43806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t   stringStartIdx = opValue;
43816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t   stringLen;
43826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op      = (int32_t)pat[fp->fPatIdx];     // Fetch the second operand
43846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx++;
43856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                opType    = URX_TYPE(op);
43866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                stringLen = URX_VAL(op);
43876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opType == URX_STRING_LEN);
43886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stringLen >= 2);
43896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UChar * pInp = inputBuf + fp->fInputIdx;
43916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UChar * pInpLimit = inputBuf + fActiveLimit;
43926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UChar * pPat = litText+stringStartIdx;
43936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UChar * pEnd = pInp + stringLen;
43946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = TRUE;
43956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                while (pInp < pEnd) {
43966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (pInp >= pInpLimit) {
43976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
43986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
43996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
44006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
44016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (*pInp++ != *pPat++) {
44026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
44036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
44046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
44056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
44066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
44086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx += stringLen;
44096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
44106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
44116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
44126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
44136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
44146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATE_SAVE:
44176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = StateSave(fp, opValue, status);
44186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
44196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END:
44226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The match loop will exit via this path on a successful match,
44236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   when we reach the end of the pattern.
44246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (toEnd && fp->fInputIdx != fActiveLimit) {
44256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // The pattern matched, but not to the end of input.  Try some more.
44266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
44276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
44286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
44296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = TRUE;
44306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto  breakFromLoop;
44316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Start and End Capture stack frame variables are laid out out like this:
44336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  fp->fExtra[opValue]  - The start of a completed capture group
44346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //             opValue+1 - The end   of a completed capture group
44356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //             opValue+2 - the start of a capture group whose end
44366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //                          has not yet been reached (and might not ever be).
44376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_START_CAPTURE:
44386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
44396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fExtra[opValue+2] = fp->fInputIdx;
44406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
44416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END_CAPTURE:
44446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
44456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(fp->fExtra[opValue+2] >= 0);            // Start pos for this group must be set.
44466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fExtra[opValue]   = fp->fExtra[opValue+2];   // Tentative start becomes real.
44476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fExtra[opValue+1] = fp->fInputIdx;           // End position
44486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]);
44496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
44506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR:                   //  $, test for End of line
44536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //     or for position before new line at end of input
44546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx < fAnchorLimit-2) {
44556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // We are no where near the end of input.  Fail.
44566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   This is the common case.  Keep it first.
44576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
44586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
44596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
44606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fAnchorLimit) {
44616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // We really are at the end of input.  Success.
44626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
44636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRequireEnd = TRUE;
44646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
44656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
44666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // If we are positioned just before a new-line that is located at the
44686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   end of input, succeed.
44696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx == fAnchorLimit-1) {
44706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c;
44716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c);
44726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) {
44746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) {
44756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // At new-line at end of input. Success
44766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
44776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fRequireEnd = TRUE;
44786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
44796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
44806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
44816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if (fp->fInputIdx == fAnchorLimit-2 &&
44826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                inputBuf[fp->fInputIdx]==0x0d && inputBuf[fp->fInputIdx+1]==0x0a) {
44836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
44846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRequireEnd = TRUE;
44856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;                         // At CR/LF at end of input.  Success
44866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
44876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
44896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
44916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
44936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_D:                   //  $, test for End of Line, in UNIX_LINES mode.
44946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fAnchorLimit-1) {
44956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Either at the last character of input, or off the end.
44966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx == fAnchorLimit-1) {
44976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // At last char of input.  Success if it's a new line.
44986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputBuf[fp->fInputIdx] == 0x0a) {
44996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
45006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fRequireEnd = TRUE;
45016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
45026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
45036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
45046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Off the end of input.  Success.
45056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
45066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRequireEnd = TRUE;
45076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
45086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Not at end of input.  Back-track out.
45126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
45136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
45146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_M:                //  $, test for End of line in multi-line mode
45176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
45186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fAnchorLimit) {
45196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We really are at the end of input.  Success.
45206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
45216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRequireEnd = TRUE;
45226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
45236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If we are positioned just before a new-line, succeed.
45256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // It makes no difference where the new-line is within the input.
45266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c = inputBuf[fp->fInputIdx];
45276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) {
45286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
45296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  In multi-line mode, hitting a new-line just before the end of input does not
45306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   set the hitEnd or requireEnd flags
45316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) {
45326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
45336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
45346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // not at a new line.  Fail.
45366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
45376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
45396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_MD:                //  $, test for End of line in multi-line and UNIX_LINES mode
45426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
45436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fAnchorLimit) {
45446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We really are at the end of input.  Success.
45456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
45466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRequireEnd = TRUE;  // Java set requireEnd in this case, even though
45476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;               //   adding a new-line would not lose the match.
45486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If we are not positioned just before a new-line, the test fails; backtrack out.
45506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // It makes no difference where the new-line is within the input.
45516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (inputBuf[fp->fInputIdx] != 0x0a) {
45526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
45536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
45566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET:                    //  ^, test for start of line
45596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx != fAnchorStart) {
45606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
45616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
45636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M:                   //  ^, test for start of line in mulit-line mode
45666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
45676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx == fAnchorStart) {
45686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We are at the start input.  Success.
45696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
45706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Check whether character just before the current pos is a new-line
45726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   unless we are at the end of input
45736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar  c = inputBuf[fp->fInputIdx - 1];
45746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((fp->fInputIdx < fAnchorLimit) &&
45756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
45766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  It's a new-line.  ^ is true.  Success.
45776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  TODO:  what should be done with positions between a CR and LF?
45786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
45796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Not at the start of a line.  Fail.
45816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
45826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
45836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
45846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
45866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M_UNIX:       //  ^, test for start of line in mulit-line + Unix-line mode
45876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
45886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(fp->fInputIdx >= fAnchorStart);
45896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx <= fAnchorStart) {
45906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We are at the start input.  Success.
45916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
45926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
45936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Check whether character just before the current pos is a new-line
45946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(fp->fInputIdx <= fAnchorLimit);
45956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar  c = inputBuf[fp->fInputIdx - 1];
45966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c != 0x0a) {
45976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Not at the start of a line.  Back-track out.
45986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
45996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
46006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
46026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_B:          // Test for word boundaries
46046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
46056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = isChunkWordBoundary((int32_t)fp->fInputIdx);
46066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                success ^= (UBool)(opValue != 0);     // flip sense for \B
46076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (!success) {
46086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
46096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
46106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
46126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_BU:          // Test for word boundaries, Unicode-style
46156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
46166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = isUWordBoundary(fp->fInputIdx);
46176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                success ^= (UBool)(opValue != 0);     // flip sense for \B
46186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (!success) {
46196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
46206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
46216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
46236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_D:            // Test for decimal digit
46266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
46276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
46286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
46296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
46306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
46316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
46326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c;
46346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
46356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int8_t ctype = u_charType(c);     // TODO:  make a unicode set for this.  Will be faster.
46366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER);
46376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                success ^= (UBool)(opValue != 0);        // flip sense for \D
46386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (!success) {
46396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
46406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
46416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
46436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_G:          // Test for position at end of previous match
46466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) {
46476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
46486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
46506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_X:
46536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Match a Grapheme, as defined by Unicode TR 29.
46546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Differs slightly from Perl, which consumes combining marks independently
46556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    of context.
46566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
46576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Fail if at end of input
46596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fActiveLimit) {
46606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
46616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
46626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
46636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
46646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Examine (and consume) the current char.
46666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   Dispatch into a little state machine, based on the char.
46676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32  c;
46686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
46696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet **sets = fPattern->fStaticSets;
46706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_NORMAL]->contains(c))  goto GC_Extend;
46716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control;
46726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_L]->contains(c))       goto GC_L;
46736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_LV]->contains(c))      goto GC_V;
46746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
46756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_V]->contains(c))       goto GC_V;
46766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_T]->contains(c))       goto GC_T;
46776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto GC_Extend;
46786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_L:
46826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
46836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
46846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_L]->contains(c))       goto GC_L;
46856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_LV]->contains(c))      goto GC_V;
46866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
46876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_V]->contains(c))       goto GC_V;
46886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
46896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto GC_Extend;
46906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_V:
46926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
46936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
46946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_V]->contains(c))       goto GC_V;
46956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_T]->contains(c))       goto GC_T;
46966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
46976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto GC_Extend;
46986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
46996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_T:
47006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
47016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
47026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sets[URX_GC_T]->contains(c))       goto GC_T;
47036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
47046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto GC_Extend;
47056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Extend:
47076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Combining characters are consumed here
47086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (;;) {
47096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
47106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
47116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
47126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
47136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (sets[URX_GC_EXTEND]->contains(c) == FALSE) {
47146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U16_BACK_1(inputBuf, 0, fp->fInputIdx);
47156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
47166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
47176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
47186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto GC_Done;
47196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Control:
47216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Most control chars stand alone (don't combine with combining chars),
47226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   except for that CR/LF sequence is a single grapheme cluster.
47236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (c == 0x0d && fp->fInputIdx < fActiveLimit && inputBuf[fp->fInputIdx] == 0x0a) {
47246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx++;
47256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
47266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Done:
47286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx >= fActiveLimit) {
47296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
47306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
47316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
47326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
47336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_Z:          // Test for end of Input
47386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx < fAnchorLimit) {
47396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
47406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
47416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
47426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRequireEnd = TRUE;
47436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
47446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
47456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATIC_SETREF:
47496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
47506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Test input character against one of the predefined sets
47516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    (Word Characters, for example)
47526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // The high bit of the op value is a flag for the match polarity.
47536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    0:   success if input char is in set.
47546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    1:   success if input char is not in set.
47556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
47566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
47576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
47586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
47596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
47606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET);
47626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                opValue &= ~URX_NEG_SET;
47636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
47646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c;
47666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
47676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c < 256) {
47686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
47696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s8->contains(c)) {
47706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = !success;
47716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
47726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
47736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
47746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s->contains(c)) {
47756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = !success;
47766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
47776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
47786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (!success) {
47796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
47806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
47816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
47826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
47836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STAT_SETREF_N:
47866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
47876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Test input character for NOT being a member of  one of
47886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    the predefined sets (Word Characters, for example)
47896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
47906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
47916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
47926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
47936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
47946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
47966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
47976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32  c;
47986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
47996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c < 256) {
48006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
48016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s8->contains(c) == FALSE) {
48026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
48036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
48046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
48056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
48066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s->contains(c) == FALSE) {
48076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
48086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
48096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
48106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
48116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
48126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
48136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_SETREF:
48166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
48176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
48186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
48196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
48206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
48216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
48226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < sets->size());
48246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // There is input left.  Pick up one char and test it for set membership.
48266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32  c;
48276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
48286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c<256) {
48296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    Regex8BitSet *s8 = &fPattern->fSets8[opValue];
48306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s8->contains(c)) {
48316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // The character is in the set.  A Match.
48326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
48336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
48346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
48356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
48366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (s->contains(c)) {
48376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // The character is in the set.  A Match.
48386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
48396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
48406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
48416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // the character wasn't in the set.
48436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
48446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
48456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
48466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY:
48496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
48506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // . matches anything, but stops at end-of-line.
48516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
48526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // At end of input.  Match failed.  Backtrack out.
48536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
48546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
48556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
48566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
48576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // There is input left.  Advance over one char, unless we've hit end-of-line
48596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32  c;
48606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
48616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
48626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
48636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // End of line in normal mode.   . does not match.
48646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
48656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
48666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
48676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
48686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
48696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_ALL:
48726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
48736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // . in dot-matches-all (including new lines) mode
48746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
48756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // At end of input.  Match failed.  Backtrack out.
48766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
48776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
48786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
48796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
48806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // There is input left.  Advance over one char, except if we are
48826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   at a cr/lf, advance over both of them.
48836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c;
48846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
48856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c==0x0d && fp->fInputIdx < fActiveLimit) {
48866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // In the case of a CR/LF, we need to advance over both.
48876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputBuf[fp->fInputIdx] == 0x0a) {
48886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        U16_FWD_1(inputBuf, fp->fInputIdx, fActiveLimit);
48896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
48906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
48916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
48926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
48936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
48956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_UNIX:
48966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
48976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // '.' operator, matches all, but stops at end-of-line.
48986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   UNIX_LINES mode, so 0x0a is the only recognized line ending.
48996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx >= fActiveLimit) {
49006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // At end of input.  Match failed.  Backtrack out.
49016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
49026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
49036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
49046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
49056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // There is input left.  Advance over one char, unless we've hit end-of-line
49076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c;
49086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
49096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c == 0x0a) {
49106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // End of line in normal mode.   '.' does not match the \n
49116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
49126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
49136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
49146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
49156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP:
49186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fPatIdx = opValue;
49196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
49206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_FAIL:
49226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = FALSE;
49236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            goto breakFromLoop;
49246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV:
49266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(opValue < fPattern->fCompiledPat->size());
49276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = StateSave(fp, fp->fPatIdx, status);       // State save to loc following current
49286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp->fPatIdx = opValue;                         // Then JMP.
49296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
49306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV_X:
49326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // This opcode is used with (x)+, when x can match a zero length string.
49336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Same as JMP_SAV, except conditional on the match having made forward progress.
49346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the
49356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   data address of the input position at the start of the loop.
49366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
49376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size());
49386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  stoOp = (int32_t)pat[opValue-1];
49396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC);
49406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  frameLoc = URX_VAL(stoOp);
49416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize);
49426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t prevInputIdx = (int32_t)fp->fExtra[frameLoc];
49436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(prevInputIdx <= fp->fInputIdx);
49446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (prevInputIdx < fp->fInputIdx) {
49456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // The match did make progress.  Repeat the loop.
49466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = StateSave(fp, fp->fPatIdx, status);  // State save to loc following current
49476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = opValue;
49486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fExtra[frameLoc] = fp->fInputIdx;
49496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
49506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If the input position did not advance, we do nothing here,
49516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   execution will fall out of the loop.
49526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
49536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
49546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT:
49566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
49576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
49586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[opValue] = 0;                 //  Set the loop counter variable to zero
49596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Pick up the three extra operands that CTR_INIT has, and
49616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    skip the pattern location counter past
49626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
49636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx += 3;
49646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
49656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
49666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
49676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minCount>=0);
49686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(maxCount>=minCount || maxCount==-1);
49696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(loopLoc>=fp->fPatIdx);
49706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (minCount == 0) {
49726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = StateSave(fp, loopLoc+1, status);
49736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
49746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (maxCount == -1) {
49756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fExtra[opValue+1] = fp->fInputIdx;   //  For loop breaking.
49766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else if (maxCount == 0) {
49776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
49786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
49796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
49806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
49816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
49826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP:
49836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
49846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
49856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t initOp = (int32_t)pat[opValue];
49866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT);
49876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
49886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minCount  = (int32_t)pat[opValue+2];
49896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxCount  = (int32_t)pat[opValue+3];
49906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (*pCounter)++;
49916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
49926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(*pCounter == maxCount);
49936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
49946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
49956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*pCounter >= minCount) {
49966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (maxCount == -1) {
49976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // Loop has no hard upper bound.
49986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // Check that it is progressing through the input, break if it is not.
49996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        int64_t *pLastInputIdx =  &fp->fExtra[URX_VAL(initOp) + 1];
50006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (fp->fInputIdx == *pLastInputIdx) {
50016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
50026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        } else {
50036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            *pLastInputIdx = fp->fInputIdx;
50046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
50056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
50066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = StateSave(fp, fp->fPatIdx, status);
50076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
50086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx = opValue + 4;    // Loop back.
50096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
50106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
50116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT_NG:
50136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
50146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Initialize a non-greedy loop
50156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
50166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[opValue] = 0;                 //  Set the loop counter variable to zero
50176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Pick up the three extra operands that CTR_INIT_NG has, and
50196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    skip the pattern location counter past
50206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
50216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx += 3;
50226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
50236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
50246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
50256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minCount>=0);
50266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(maxCount>=minCount || maxCount==-1);
50276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(loopLoc>fp->fPatIdx);
50286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (maxCount == -1) {
50296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fExtra[opValue+1] = fp->fInputIdx;   //  Save initial input index for loop breaking.
50306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
50316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (minCount == 0) {
50336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (maxCount != 0) {
50346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fp = StateSave(fp, fp->fPatIdx, status);
50356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
50366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = loopLoc+1;   // Continue with stuff after repeated block
50376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
50386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
50396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
50406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP_NG:
50426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
50436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Non-greedy {min, max} loops
50446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
50456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t initOp = (int32_t)pat[opValue];
50466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG);
50476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
50486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minCount  = (int32_t)pat[opValue+2];
50496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxCount  = (int32_t)pat[opValue+3];
50506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (*pCounter)++;
50526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
50536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // The loop has matched the maximum permitted number of times.
50546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   Break out of here with no action.  Matching will
50556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   continue with the following pattern.
50566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(*pCounter == maxCount);
50576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
50586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
50596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*pCounter < minCount) {
50616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We haven't met the minimum number of matches yet.
50626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   Loop back for another one.
50636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = opValue + 4;    // Loop back.
50646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
50656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We do have the minimum number of matches.
50666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // If there is no upper bound on the loop iterations, check that the input index
50686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // is progressing, and stop the loop if it is not.
50696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (maxCount == -1) {
50706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        int64_t *pLastInputIdx =  &fp->fExtra[URX_VAL(initOp) + 1];
50716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (fp->fInputIdx == *pLastInputIdx) {
50726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
50736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
50746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *pLastInputIdx = fp->fInputIdx;
50756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
50766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Loop Continuation: we will fall into the pattern following the loop
50786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   (non-greedy, don't execute loop body first), but first do
50796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   a state save to the top of the loop, so that a match failure
50806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   in the following pattern will try another iteration of the loop.
50816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = StateSave(fp, opValue + 4, status);
50826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
50836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
50846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
50856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_SP:
50876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
50886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fData[opValue] = fStack->size();
50896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
50906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
50916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LD_SP:
50926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
50936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
50946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t newStackSize = (int32_t)fData[opValue];
50956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(newStackSize <= fStack->size());
50966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
50976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (newFP == (int64_t *)fp) {
50986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
50996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t i;
51016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (i=0; i<fFrameSize; i++) {
51026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    newFP[i] = ((int64_t *)fp)[i];
51036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)newFP;
51056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fStack->setSize(newStackSize);
51066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
51076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
51086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF:
51106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
51116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue < fFrameSize);
51126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t groupStartIdx = fp->fExtra[opValue];
51136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t groupEndIdx   = fp->fExtra[opValue+1];
51146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(groupStartIdx <= groupEndIdx);
51156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t inputIndex = fp->fInputIdx;
51166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (groupStartIdx < 0) {
51176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // This capture group has not participated in the match thus far,
51186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
51196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
51206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = TRUE;
51226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (int64_t groupIndex = groupStartIdx; groupIndex < groupEndIdx; ++groupIndex,++inputIndex) {
51236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputIndex >= fActiveLimit) {
51246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
51256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
51266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
51276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
51286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputBuf[groupIndex] != inputBuf[inputIndex]) {
51296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
51306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
51316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
51326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
51346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = inputIndex;
51356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
51366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
51376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
51396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
51406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF_I:
51426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
51436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue < fFrameSize);
51446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t groupStartIdx = fp->fExtra[opValue];
51456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t groupEndIdx   = fp->fExtra[opValue+1];
51466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(groupStartIdx <= groupEndIdx);
51476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (groupStartIdx < 0) {
51486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // This capture group has not participated in the match thus far,
51496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
51506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
51516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                CaseFoldingUCharIterator captureGroupItr(inputBuf, groupStartIdx, groupEndIdx);
51536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                CaseFoldingUCharIterator inputItr(inputBuf, fp->fInputIdx, fActiveLimit);
51546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Note: if the capture group match was of an empty string the backref
51566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //         match succeeds.  Verified by testing:  Perl matches succeed
51576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //         in this case, so we do too.
51586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool success = TRUE;
51606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
51616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 captureGroupChar = captureGroupItr.next();
51626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (captureGroupChar == U_SENTINEL) {
51636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = TRUE;
51646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
51656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
51666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32 inputChar = inputItr.next();
51676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputChar == U_SENTINEL) {
51686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
51696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
51706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
51716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
51726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (inputChar != captureGroupChar) {
51736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
51746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
51756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
51766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success && inputItr.inExpansion()) {
51796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We otained a match by consuming part of a string obtained from
51806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // case-folding a single code point of the input text.
51816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // This does not count as an overall match.
51826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    success = FALSE;
51836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
51866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = inputItr.getIndex();
51876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
51886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
51896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
51906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
51916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
51926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
51936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_INP_LOC:
51946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
51956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue >= 0 && opValue < fFrameSize);
51966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[opValue] = fp->fInputIdx;
51976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
51986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
51996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMPX:
52016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
52026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
52036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx += 1;
52046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t dataLoc  = URX_VAL(pat[instrOperandLoc]);
52056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize);
52066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t savedInputIdx = (int32_t)fp->fExtra[dataLoc];
52076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(savedInputIdx <= fp->fInputIdx);
52086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (savedInputIdx < fp->fInputIdx) {
52096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = opValue;                               // JMP
52106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
52116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no progress in loop.
52126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
52136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
52146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
52156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_START:
52176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
52186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Entering a lookahead block.
52196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save Stack Ptr, Input Pos.
52206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
52216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue]   = fStack->size();
52226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue+1] = fp->fInputIdx;
52236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveStart     = fLookStart;          // Set the match region change for
52246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit     = fLookLimit;          //   transparent bounds.
52256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
52266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
52276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_END:
52296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
52306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Leaving a look-ahead block.
52316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  restore Stack Ptr, Input Pos to positions they had on entry to block.
52326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
52336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stackSize = fStack->size();
52346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t newStackSize = (int32_t)fData[opValue];
52356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stackSize >= newStackSize);
52366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (stackSize > newStackSize) {
52376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Copy the current top frame back to the new (cut back) top frame.
52386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   This makes the capture groups from within the look-ahead
52396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   expression available.
52406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
52416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t i;
52426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    for (i=0; i<fFrameSize; i++) {
52436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        newFP[i] = ((int64_t *)fp)[i];
52446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
52456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)newFP;
52466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fStack->setSize(newStackSize);
52476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
52486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = fData[opValue+1];
52496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Restore the active region bounds in the input string; they may have
52516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    been changed because of transparent bounds on a Region.
52526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveStart = fRegionStart;
52536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit = fRegionLimit;
52546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
52556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
52566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR_I:
52586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fp->fInputIdx < fActiveLimit) {
52596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 c;
52606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
52616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
52626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
52636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
52646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
52656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fHitEnd = TRUE;
52666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
52676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
52686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
52696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_I:
52716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Case-insensitive test input against a literal string.
52726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Strings require two slots in the compiled pattern, one for the
52736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   offset to the string text, and one for the length.
52746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   The compiled string has already been case folded.
52756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
52766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UChar *patternString = litText + opValue;
52776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op      = (int32_t)pat[fp->fPatIdx];
52796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx++;
52806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                opType  = URX_TYPE(op);
52816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                opValue = URX_VAL(op);
52826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opType == URX_STRING_LEN);
52836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t patternStringLen = opValue;  // Length of the string from the pattern.
52846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
52856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32      cText;
52866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32      cPattern;
52876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UBool        success = TRUE;
52886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t      patternStringIdx  = 0;
52896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                CaseFoldingUCharIterator inputIterator(inputBuf, fp->fInputIdx, fActiveLimit);
52906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                while (patternStringIdx < patternStringLen) {
52916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U16_NEXT(patternString, patternStringIdx, patternStringLen, cPattern);
52926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cText = inputIterator.next();
52936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (cText != cPattern) {
52946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        success = FALSE;
52956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (cText == U_SENTINEL) {
52966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            fHitEnd = TRUE;
52976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
52986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
52996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
53006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
53016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (inputIterator.inExpansion()) {
53026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    success = FALSE;
53036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
53046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (success) {
53066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx = inputIterator.getIndex();
53076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
53086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
53096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
53106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
53116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
53126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_START:
53146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
53156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Entering a look-behind block.
53166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save Stack Ptr, Input Pos.
53176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   TODO:  implement transparent bounds.  Ticket #6067
53186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
53196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue]   = fStack->size();
53206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue+1] = fp->fInputIdx;
53216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Init the variable containing the start index for attempted matches.
53226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue+2] = -1;
53236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save input string length, then reset to pin any matches to end at
53246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   the current position.
53256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fData[opValue+3] = fActiveLimit;
53266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit     = fp->fInputIdx;
53276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
53286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
53296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_CONT:
53326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
53336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Positive Look-Behind, at top of loop checking for matches of LB expression
53346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    at all possible input starting positions.
53356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetch the min and max possible match lengths.  They are the operands
53376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   of this op in the pattern.
53386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minML = (int32_t)pat[fp->fPatIdx++];
53396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxML = (int32_t)pat[fp->fPatIdx++];
53406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minML <= maxML);
53416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minML >= 0);
53426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetch (from data) the last input index where a match was attempted.
53446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
53456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t  *lbStartIdx = &fData[opValue+2];
53466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*lbStartIdx < 0) {
53476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // First time through loop.
53486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *lbStartIdx = fp->fInputIdx - minML;
53496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
53506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // 2nd through nth time through the loop.
53516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Back up start position for match by one.
53526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (*lbStartIdx == 0) {
53536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (*lbStartIdx)--;
53546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
53556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        U16_BACK_1(inputBuf, 0, *lbStartIdx);
53566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
53576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
53586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
53606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We have tried all potential match starting points without
53616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  getting a match.  Backtrack out, and out of the
53626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   Look Behind altogether.
53636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
53646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int64_t restoreInputLen = fData[opValue+3];
53656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(restoreInputLen >= fActiveLimit);
53666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(restoreInputLen <= fInputLength);
53676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fActiveLimit = restoreInputLen;
53686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
53696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
53706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
53726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //      (successful match will fall off the end of the loop.)
53736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx-3, status);
53746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx =  *lbStartIdx;
53756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
53766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
53776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_END:
53796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // End of a look-behind block, after a successful match.
53806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
53816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
53826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx != fActiveLimit) {
53836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  The look-behind expression matched, but the match did not
53846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //    extend all the way to the point that we are looking behind from.
53856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  FAIL out of here, which will take us back to the LB_CONT, which
53866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //     will retry the match starting at another position or fail
53876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //     the look-behind altogether, whichever is appropriate.
53886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
53896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
53906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
53916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
53926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Look-behind match is good.  Restore the orignal input string length,
53936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   which had been truncated to pin the end of the lookbehind match to the
53946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   position being looked-behind.
53956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t originalInputLen = fData[opValue+3];
53966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(originalInputLen >= fActiveLimit);
53976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(originalInputLen <= fInputLength);
53986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit = originalInputLen;
53996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
54006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
54016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_CONT:
54046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
54056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Negative Look-Behind, at top of loop checking for matches of LB expression
54066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    at all possible input starting positions.
54076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetch the extra parameters of this op.
54096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minML       = (int32_t)pat[fp->fPatIdx++];
54106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxML       = (int32_t)pat[fp->fPatIdx++];
54116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t continueLoc = (int32_t)pat[fp->fPatIdx++];
54126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                continueLoc = URX_VAL(continueLoc);
54136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minML <= maxML);
54146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(minML >= 0);
54156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(continueLoc > fp->fPatIdx);
54166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetch (from data) the last input index where a match was attempted.
54186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
54196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t  *lbStartIdx = &fData[opValue+2];
54206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*lbStartIdx < 0) {
54216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // First time through loop.
54226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *lbStartIdx = fp->fInputIdx - minML;
54236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
54246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // 2nd through nth time through the loop.
54256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Back up start position for match by one.
54266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (*lbStartIdx == 0) {
54276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (*lbStartIdx)--;   // Because U16_BACK is unsafe starting at 0.
54286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
54296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        U16_BACK_1(inputBuf, 0, *lbStartIdx);
54306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
54316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
54326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
54346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We have tried all potential match starting points without
54356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  getting a match, which means that the negative lookbehind as
54366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  a whole has succeeded.  Jump forward to the continue location
54376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int64_t restoreInputLen = fData[opValue+3];
54386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(restoreInputLen >= fActiveLimit);
54396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(restoreInputLen <= fInputLength);
54406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fActiveLimit = restoreInputLen;
54416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx = continueLoc;
54426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
54436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
54446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
54466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //      (successful match will cause a FAIL out of the loop altogether.)
54476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx-4, status);
54486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx =  *lbStartIdx;
54496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
54506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
54516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_END:
54536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // End of a negative look-behind block, after a successful match.
54546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
54556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
54566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (fp->fInputIdx != fActiveLimit) {
54576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  The look-behind expression matched, but the match did not
54586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //    extend all the way to the point that we are looking behind from.
54596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  FAIL out of here, which will take us back to the LB_CONT, which
54606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //     will retry the match starting at another position or succeed
54616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //     the look-behind altogether, whichever is appropriate.
54626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
54636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
54646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
54656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Look-behind expression matched, which means look-behind test as
54676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   a whole Fails
54686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Restore the orignal input string length, which had been truncated
54706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   inorder to pin the end of the lookbehind match
54716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   to the position being looked-behind.
54726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int64_t originalInputLen = fData[opValue+3];
54736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(originalInputLen >= fActiveLimit);
54746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(originalInputLen <= fInputLength);
54756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fActiveLimit = originalInputLen;
54766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Restore original stack position, discarding any state saved
54786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   by the successful pattern match.
54796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
54806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t newStackSize = (int32_t)fData[opValue];
54816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(fStack->size() > newStackSize);
54826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fStack->setSize(newStackSize);
54836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  FAIL, which will take control back to someplace
54856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  prior to entering the look-behind test.
54866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
54876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
54886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
54896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
54916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_SR_I:
54926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Loop Initialization for the optimized implementation of
54936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //     [some character set]*
54946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   This op scans through all matching input.
54956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
54966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
54976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue > 0 && opValue < sets->size());
54986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                Regex8BitSet *s8 = &fPattern->fSets8[opValue];
54996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeSet   *s  = (UnicodeSet *)sets->elementAt(opValue);
55006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
55016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Loop through input, until either the input is exhausted or
55026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   we reach a character that is not a member of the set.
55036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t ix = (int32_t)fp->fInputIdx;
55046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
55056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (ix >= fActiveLimit) {
55066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fHitEnd = TRUE;
55076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
55086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
55096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32   c;
55106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U16_NEXT(inputBuf, ix, fActiveLimit, c);
55116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (c<256) {
55126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (s8->contains(c) == FALSE) {
55136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            U16_BACK_1(inputBuf, 0, ix);
55146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
55156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
55166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
55176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (s->contains(c) == FALSE) {
55186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            U16_BACK_1(inputBuf, 0, ix);
55196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
55206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
55216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
55226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
55236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
55246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If there were no matching characters, skip over the loop altogether.
55256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   The loop doesn't run at all, a * op always succeeds.
55266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (ix == fp->fInputIdx) {
55276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
55286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
55296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
55306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
55316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
55326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   must follow.  It's operand is the stack location
55336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   that holds the starting input index for the match of this [set]*
55346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
55356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
55366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stackLoc = URX_VAL(loopcOp);
55376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
55386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[stackLoc] = fp->fInputIdx;
55396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = ix;
55406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
55416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save State to the URX_LOOP_C op that follows this one,
55426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   so that match failures in the following code will return to there.
55436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
55446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx, status);
55456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx++;
55466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
55476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
55486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
55496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
55506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_DOT_I:
55516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Loop Initialization for the optimized implementation of .*
55526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   This op scans through all remaining input.
55536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
55546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
55556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Loop through input until the input is exhausted (we reach an end-of-line)
55566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // In DOTALL mode, we can just go straight to the end of the input.
55576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t ix;
55586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if ((opValue & 1) == 1) {
55596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Dot-matches-All mode.  Jump straight to the end of the string.
55606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ix = (int32_t)fActiveLimit;
55616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fHitEnd = TRUE;
55626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
55636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // NOT DOT ALL mode.  Line endings do not match '.'
55646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Scan forward until a line ending or end of input.
55656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ix = (int32_t)fp->fInputIdx;
55666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    for (;;) {
55676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (ix >= fActiveLimit) {
55686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            fHitEnd = TRUE;
55696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
55706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
55716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UChar32   c;
55726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        U16_NEXT(inputBuf, ix, fActiveLimit, c);   // c = inputBuf[ix++]
55736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if ((c & 0x7f) <= 0x29) {          // Fast filter of non-new-line-s
55746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if ((c == 0x0a) ||             //  0x0a is newline in both modes.
55756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                (((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
55766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                   ((c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029))) {
55776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                //  char is a line ending.  Put the input pos back to the
55786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                //    line ending char, and exit the scanning loop.
55796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                U16_BACK_1(inputBuf, 0, ix);
55806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                break;
55816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
55826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
55836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
55846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
55856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
55866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If there were no matching characters, skip over the loop altogether.
55876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   The loop doesn't run at all, a * op always succeeds.
55886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (ix == fp->fInputIdx) {
55896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
55906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
55916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
55926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
55936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
55946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   must follow.  It's operand is the stack location
55956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   that holds the starting input index for the match of this .*
55966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
55976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
55986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stackLoc = URX_VAL(loopcOp);
55996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
56006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fExtra[stackLoc] = fp->fInputIdx;
56016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fInputIdx = ix;
56026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Save State to the URX_LOOP_C op that follows this one,
56046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   so that match failures in the following code will return to there.
56056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
56066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx, status);
56076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp->fPatIdx++;
56086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
56096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
56106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_C:
56136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
56146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(opValue>=0 && opValue<fFrameSize);
56156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                backSearchIndex = (int32_t)fp->fExtra[opValue];
56166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(backSearchIndex <= fp->fInputIdx);
56176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (backSearchIndex == fp->fInputIdx) {
56186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // We've backed up the input idx to the point that the loop started.
56196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // The loop is done.  Leave here without saving state.
56206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  Subsequent failures won't come back here.
56216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
56226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
56236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Set up for the next iteration of the loop, with input index
56246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   backed up by one from the last time through,
56256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   and a state save to this instruction in case the following code fails again.
56266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   (We're going backwards because this loop emulates stack unwinding, not
56276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    the initial scan forward.)
56286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(fp->fInputIdx > 0);
56296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 prevC;
56306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_PREV(inputBuf, 0, fp->fInputIdx, prevC); // !!!: should this 0 be one of f*Limit?
56316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (prevC == 0x0a &&
56336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fp->fInputIdx > backSearchIndex &&
56346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    inputBuf[fp->fInputIdx-1] == 0x0d) {
56356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t prevOp = (int32_t)pat[fp->fPatIdx-2];
56366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) {
56376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // .*, stepping back over CRLF pair.
56386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        U16_BACK_1(inputBuf, 0, fp->fInputIdx);
56396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
56406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
56416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fp = StateSave(fp, fp->fPatIdx-1, status);
56446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
56456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
56466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        default:
56506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Trouble.  The compiled pattern contains an entry with an
56516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //           unrecognized type tag.
56526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(FALSE);
56536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
56546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
56566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMatch = FALSE;
56576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
56586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
56596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
56606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbreakFromLoop:
56626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatch = isMatch;
56636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (isMatch) {
56646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLastMatchEnd = fMatchEnd;
56656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fMatchStart   = startIdx;
56666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fMatchEnd     = fp->fInputIdx;
56676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fTraceDebug) {
56686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_RUN_DEBUG_PRINTF(("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd));
56696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
56706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
56716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else
56726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
56736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fTraceDebug) {
56746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
56756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
56766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
56776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fFrame = fp;                // The active stack frame when the engine stopped.
56796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Contains the capture group results that we need to
56806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    access later.
56816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return;
56836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
56846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher)
56876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
56896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
56906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
5691