16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  file:  regexcmp.cpp
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Copyright (C) 2002-2013 International Business Machines Corporation and others.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  All Rights Reserved.
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  This file contains the ICU regular expression compiler, which is responsible
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  for processing a regular expression pattern into the compiled form that
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  is used by the match finding engine.
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_REGULAR_EXPRESSIONS
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchriter.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/parsepos.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/parseerr.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/regex.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf.h"
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "patternprops.h"
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "putilimp.h"
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h"
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvectr32.h"
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvectr64.h"
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h"
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucln_in.h"
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uinvchar.h"
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regeximp.h"
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regexcst.h"   // Contains state table for the regex pattern parser.
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        //   generated by a Perl script.
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regexcmp.h"
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regexst.h"
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regextxt.h"
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Constructor.
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexCompile::RegexCompile(RegexPattern *rxp, UErrorCode &status) :
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org   fParenStack(status), fSetStack(status), fSetOpStack(status)
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Lazy init of all shared global sets (needed for init()'s empty text)
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexStaticSets::initGlobals(&status);
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStatus           = &status;
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat            = rxp;
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fScanIndex        = 0;
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLastChar         = -1;
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPeekChar         = -1;
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLineNum          = 1;
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fCharNum          = 0;
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fQuoteMode        = FALSE;
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fInBackslashQuote = FALSE;
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fModeFlags        = fRXPat->fFlags | 0x80000000;
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fEOLComments      = TRUE;
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchOpenParen   = -1;
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchCloseParen  = -1;
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(status) && U_FAILURE(rxp->fDeferredStatus)) {
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = rxp->fDeferredStatus;
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chAmp       = 0x26;      // '&'
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chDash      = 0x2d;      // '-'
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Destructor
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexCompile::~RegexCompile() {
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline void addCategory(UnicodeSet *set, int32_t value, UErrorCode& ec) {
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    set->addAll(UnicodeSet().applyIntPropertyValue(UCHAR_GENERAL_CATEGORY_MASK, value, ec));
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Compile regex pattern.   The state machine for rexexp pattern parsing is here.
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                           The state tables are hand-written in the file regexcst.txt,
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                           and converted to the form used here by a perl
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                           script regexcst.pl
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid    RegexCompile::compile(
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         const UnicodeString &pat,   // Source pat to be compiled.
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UParseError &pp,            // Error position info
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UErrorCode &e)              // Error Code
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fPatternString = new UnicodeString(pat);
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UText patternText = UTEXT_INITIALIZER;
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utext_openConstUnicodeString(&patternText, fRXPat->fPatternString, &e);
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(e)) {
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        compile(&patternText, pp, e);
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_close(&patternText);
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   compile, UText mode
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     All the work is actually done here.
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid    RegexCompile::compile(
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UText *pat,                 // Source pat to be compiled.
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UParseError &pp,            // Error position info
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UErrorCode &e)              // Error Code
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStatus             = &e;
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fParseErr           = &pp;
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStackPtr           = 0;
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fStack[fStackPtr]   = 0;
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // There should be no pattern stuff in the RegexPattern object.  They can not be reused.
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(fRXPat->fPattern == NULL || utext_nativeLength(fRXPat->fPattern) == 0);
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Prepare the RegexPattern object to receive the compiled pattern.
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fPattern        = utext_clone(fRXPat->fPattern, pat, FALSE, TRUE, fStatus);
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fStaticSets     = RegexStaticSets::gStaticSets->fPropSets;
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fStaticSets8    = RegexStaticSets::gStaticSets->fPropSets8;
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Initialize the pattern scanning state machine
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fPatternLength = utext_nativeLength(pat);
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t                state = 1;
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const RegexTableEl      *tableEl;
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // UREGEX_LITERAL force entire pattern to be treated as a literal string.
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fModeFlags & UREGEX_LITERAL) {
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fQuoteMode = TRUE;
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextChar(fC);                        // Fetch the first char from the pattern string.
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Main loop for the regex pattern parsing state machine.
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Runs once per state transition.
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Each time through optionally performs, depending on the state table,
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //      - an advance to the the next pattern char
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //      - an action to be performed.
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //      - pushing or popping a state to/from the local state return stack.
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   file regexcst.txt is the source for the state table.  The logic behind
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     recongizing the pattern syntax is there, not here.
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Bail out if anything has gone wrong.
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Regex pattern parsing stops on the first error encountered.
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(*fStatus)) {
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(state != 0);
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Find the state table element that matches the input char from the pattern, or the
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    class of the input character.  Start with the first table row for this
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    state, then linearly scan forward until we find a row that matches the
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    character.  The last row for each state always matches all characters, so
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    the search will stop there, if not before.
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        tableEl = &gRuleParseStateTable[state];
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_SCAN_DEBUG_PRINTF(("char, line, col = (\'%c\', %d, %d)    state=%s ",
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fC.fChar, fLineNum, fCharNum, RegexStateNames[state]));
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (;;) {    // loop through table rows belonging to this state, looking for one
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      //   that matches the current input char.
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            REGEX_SCAN_DEBUG_PRINTF(("."));
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (tableEl->fCharClass < 127 && fC.fQuoted == FALSE &&   tableEl->fCharClass == fC.fChar) {
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Table row specified an individual character, not a set, and
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   the input character is not quoted, and
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   the input character matched it.
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (tableEl->fCharClass == 255) {
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Table row specified default, match anything character class.
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (tableEl->fCharClass == 254 && fC.fQuoted)  {
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Table row specified "quoted" and the char was quoted.
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (tableEl->fCharClass == 253 && fC.fChar == (UChar32)-1)  {
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Table row specified eof and we hit eof on the input.
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 &&   // Table specs a char class &&
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fC.fQuoted == FALSE &&                                       //   char is not escaped &&
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fC.fChar != (UChar32)-1) {                                   //   char is not EOF
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(tableEl->fCharClass <= 137);
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (RegexStaticSets::gStaticSets->fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) {
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Table row specified a character class, or set of characters,
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   and the current char matches it.
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // No match on this row, advance to the next  row for this state,
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            tableEl++;
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        REGEX_SCAN_DEBUG_PRINTF(("\n"));
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We've found the row of the state table that matches the current input
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   character from the rules string.
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Perform any action specified  by this row in the state table.
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (doParseActions(tableEl->fAction) == FALSE) {
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Break out of the state machine loop if the
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   the action signalled some kind of error, or
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   the action was to exit, occurs on normal end-of-rules-input.
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (tableEl->fPushState != 0) {
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fStackPtr++;
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fStackPtr >= kStackSize) {
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_REGEX_INTERNAL_ERROR);
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                REGEX_SCAN_DEBUG_PRINTF(("RegexCompile::parse() - state stack overflow.\n"));
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fStackPtr--;
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fStack[fStackPtr] = tableEl->fPushState;
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  NextChar.  This is where characters are actually fetched from the pattern.
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //             Happens under control of the 'n' tag in the state table.
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (tableEl->fNextChar) {
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            nextChar(fC);
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Get the next state from the table entry, or from the
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   state stack if the next state was specified as "pop".
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (tableEl->fNextState != 255) {
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            state = tableEl->fNextState;
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            state = fStack[fStackPtr];
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fStackPtr--;
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fStackPtr < 0) {
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // state stack underflow
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // This will occur if the user pattern has mis-matched parentheses,
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   with extra close parens.
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fStackPtr++;
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_REGEX_MISMATCHED_PAREN);
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Bail out if the pattern had errors.
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Set stack cleanup:  a successful compile would have left it empty,
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   but errors can leave temporary sets hanging around.
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while (!fSetStack.empty()) {
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete (UnicodeSet *)fSetStack.pop();
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The pattern has now been read and processed, and the compiled code generated.
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compute the number of digits requried for the largest capture group number.
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fMaxCaptureDigits = 1;
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  n = 10;
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  groupCount = fRXPat->fGroupMap->size();
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (n <= groupCount) {
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fMaxCaptureDigits++;
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        n *= 10;
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The pattern's fFrameSize so far has accumulated the requirements for
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   storage for capture parentheses, counters, etc. that are encountered
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   in the pattern.  Add space for the two variables that are always
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   present in the saved state:  the input string position (int64_t) and
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   the position in the compiled pattern.
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fFrameSize+=RESTACKFRAME_HDRCOUNT;
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Optimization pass 1: NOPs, back-references, and case-folding
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    stripNOPs();
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Get bounds for the minimum and maximum length of a string that this
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   pattern can match.  Used to avoid looking for matches in strings that
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   are too short.
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fMinMatchLen = minMatchLength(3, fRXPat->fCompiledPat->size()-1);
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Optimization pass 2: match start type
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    matchStartType();
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Set up fast latin-1 range sets
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t numSets = fRXPat->fSets->size();
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fSets8 = new Regex8BitSet[numSets];
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Null pointer check.
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fRXPat->fSets8 == NULL) {
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        e = *fStatus = U_MEMORY_ALLOCATION_ERROR;
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=0; i<numSets; i++) {
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeSet *s = (UnicodeSet *)fRXPat->fSets->elementAt(i);
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fSets8[i].init(s);
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  doParseAction        Do some action during regex pattern parsing.
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       Called by the parse state machine.
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       Generation of the match engine PCode happens here, or
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       in functions called from the parse actions defined here.
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexCompile::doParseActions(int32_t action)
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool   returnVal = TRUE;
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch ((Regex_PatternParseAction)action) {
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doPatStart:
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Start of pattern compiles to:
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //0   SAVE   2        Fall back to position of FAIL
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //1   jmp    3
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //2   FAIL            Stop if we ever reach here.
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //3   NOP             Dummy, so start of pattern looks the same as
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                    the start of an ( grouping.
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //4   NOP             Resreved, will be replaced by a save if there are
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                    OR | operators at the top level
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_STATE_SAVE, 2), *fStatus);
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_JMP,  3), *fStatus);
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_FAIL, 0), *fStatus);
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Standard open nonCapture paren action emits the two NOPs and
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   sets up the paren stack frame.
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        doParseActions(doOpenNonCaptureParen);
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doPatFinish:
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We've scanned to the end of the pattern
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  The end of pattern compiles to:
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //        URX_END
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    which will stop the runtime match engine.
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Encountering end of pattern also behaves like a close paren,
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   and forces fixups of the State Save at the beginning of the compiled pattern
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   and of any OR operations at the top level.
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        handleCloseParen();
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fParenStack.size() > 0) {
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Missing close paren in pattern.
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            error(U_REGEX_MISMATCHED_PAREN);
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // add the END operation to the compiled pattern.
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_END, 0), *fStatus);
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Terminate the pattern compilation state machine.
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        returnVal = FALSE;
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOrOperator:
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Scanning a '|', as in (A|B)
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Generate code for any pending literals preceding the '|'
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Insert a SAVE operation at the start of the pattern section preceding
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   this OR at this level.  This SAVE will branch the match forward
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   to the right hand side of the OR in the event that the left hand
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   side fails to match and backtracks.  Locate the position for the
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   save from the location on the top of the parentheses stack.
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t savePosition = fParenStack.popi();
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(savePosition);
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(op) == URX_NOP);  // original contents of reserved location
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1);
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op, savePosition);
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Append an JMP operation into the compiled pattern.  The operand for
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  the JMP will eventually be the location following the ')' for the
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  group.  This will be patched in later, when the ')' is encountered.
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_JMP, 0);
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Push the position of the newly added JMP op onto the parentheses stack.
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // This registers if for fixup when this block's close paren is encountered.
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Append a NOP to the compiled pattern.  This is the slot reserved
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   for a SAVE in the event that there is yet another '|' following
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   this one.
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOpenCaptureParen:
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Open Paren.
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Compile to a
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - NOP, which later may be replaced by a save-state if the
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //         parenthesized group gets a * quantifier, followed by
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - START_CAPTURE  n    where n is stack frame offset to the capture group variables.
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - NOP, which may later be replaced by a save-state if there
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //             is an '|' alternation within the parens.
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    Each capture group gets three slots in the save stack frame:
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //         0: Capture Group start position (in input string being matched.)
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //         1: Capture Group end position.
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //         2: Start of Match-in-progress.
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    The first two locations are for a completed capture group, and are
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     referred to by back references and the like.
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    The third location stores the capture start position when an START_CAPTURE is
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      encountered.  This will be promoted to a completed capture when (and if) the corresponding
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      END_CAPTURE is encountered.
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals();
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  varsLoc    = fRXPat->fFrameSize;    // Reserve three slots in match stack frame.
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fFrameSize += 3;
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  cop        = URX_BUILD(URX_START_CAPTURE, varsLoc);
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(cop, *fStatus);
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // On the Parentheses stack, start a new frame and add the postions
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of the two NOPs.  Depending on what follows in the pattern, the
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   NOPs may be changed to SAVE_STATE or JMP ops, with a target
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   address of the end of the parenthesized group.
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(capturing, *fStatus);                        // Frame type.
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus);   // The first  NOP location
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP loc
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Save the mapping from group number to stack frame variable position.
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fGroupMap->addElement(varsLoc, *fStatus);
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         break;
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOpenNonCaptureParen:
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Open non-caputuring (grouping only) Paren.
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Compile to a
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - NOP, which later may be replaced by a save-state if the
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //         parenthesized group gets a * quantifier, followed by
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - NOP, which may later be replaced by a save-state if there
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //             is an '|' alternation within the parens.
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals();
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // On the Parentheses stack, start a new frame and add the postions
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of the two NOPs.
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(plain,      *fStatus);                       // Begin a new frame.
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus);   // The first  NOP location
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP loc
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         break;
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOpenAtomicParen:
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Open Atomic Paren.  (?>
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Compile to a
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - NOP, which later may be replaced if the parenthesized group
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //         has a quantifier, followed by
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - STO_SP  save state stack position, so it can be restored at the ")"
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - NOP, which may later be replaced by a save-state if there
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //             is an '|' alternation within the parens.
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals();
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  varLoc    = fRXPat->fDataSize;    // Reserve a data location for saving the
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize += 1;                    //  state stack ptr.
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  stoOp     = URX_BUILD(URX_STO_SP, varLoc);
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(stoOp, *fStatus);
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // On the Parentheses stack, start a new frame and add the postions
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of the two NOPs.  Depending on what follows in the pattern, the
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   NOPs may be changed to SAVE_STATE or JMP ops, with a target
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   address of the end of the parenthesized group.
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(atomic, *fStatus);                           // Frame type.
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus);   // The first NOP
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOpenLookAhead:
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Positive Look-ahead   (?=  stuff  )
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Note:   Addition of transparent input regions, with the need to
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //           restore the original regions when failing out of a lookahead
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //           block, complicated this sequence.  Some conbined opcodes
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //           might make sense - or might not, lookahead aren't that common.
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      Caution:  min match length optimization knows about this
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //               sequence; don't change without making updates there too.
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Compiles to
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    1    START_LA     dataLoc     Saves SP, Input Pos
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    2.   STATE_SAVE   4            on failure of lookahead, goto 4
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    3    JMP          6           continue ...
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    4.   LA_END                   Look Ahead failed.  Restore regions.
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    5.   BACKTRACK                and back track again.
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    6.   NOP              reserved for use by quantifiers on the block.
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                          Look-ahead can't have quantifiers, but paren stack
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                             compile time conventions require the slot anyhow.
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    7.   NOP              may be replaced if there is are '|' ops in the block.
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    8.     code for parenthesized stuff.
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    9.   LA_END
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Two data slots are reserved, for saving the stack ptr and the input position.
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals();
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dataLoc = fRXPat->fDataSize;
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize += 2;
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = URX_BUILD(URX_LA_START, dataLoc);
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2);
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_LA_END, dataLoc);
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_BACKTRACK, 0);
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_NOP, 0);
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // On the Parentheses stack, start a new frame and add the postions
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of the NOPs.
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(lookAhead, *fStatus);                        // Frame type.
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus);   // The first  NOP location
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP location
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOpenLookAheadNeg:
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Negated Lookahead.   (?! stuff )
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Compiles to
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    1.    START_LA    dataloc
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    2.    SAVE_STATE  7         // Fail within look-ahead block restores to this state,
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                                //   which continues with the match.
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    3.    NOP                   // Std. Open Paren sequence, for possible '|'
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    4.       code for parenthesized stuff.
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    5.    END_LA                // Cut back stack, remove saved state from step 2.
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    6.    BACKTRACK             // code in block succeeded, so neg. lookahead fails.
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    7.    END_LA                // Restore match region, in case look-ahead was using
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                                        an alternate (transparent) region.
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals();
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dataLoc = fRXPat->fDataSize;
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize += 2;
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = URX_BUILD(URX_LA_START, dataLoc);
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_STATE_SAVE, 0);    // dest address will be patched later.
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_NOP, 0);
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // On the Parentheses stack, start a new frame and add the postions
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of the StateSave and NOP.
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(negLookAhead, *fStatus);                    // Frame type
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus);   // The STATE_SAVE location
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP location
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Instructions #5 - #7 will be added when the ')' is encountered.
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOpenLookBehind:
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   Compile a (?<= look-behind open paren.
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //          Compiles to
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              0       URX_LB_START     dataLoc
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              1       URX_LB_CONT      dataLoc
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              2                        MinMatchLen
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              3                        MaxMatchLen
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              4       URX_NOP          Standard '(' boilerplate.
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              5       URX_NOP          Reserved slot for use with '|' ops within (block).
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              6         <code for LookBehind expression>
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              7       URX_LB_END       dataLoc    # Check match len, restore input  len
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              8       URX_LA_END       dataLoc    # Restore stack, input pos
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //          Allocate a block of matcher data, to contain (when running a match)
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              0:    Stack ptr on entry
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              1:    Input Index on entry
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              2:    Start index of match current match attempt.
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              3:    Original Input String len.
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Generate match code for any pending literals.
6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals();
6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Allocate data space
6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dataLoc = fRXPat->fDataSize;
6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize += 4;
6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit URX_LB_START
6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = URX_BUILD(URX_LB_START, dataLoc);
6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit URX_LB_CONT
6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_LB_CONT, dataLoc);
6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(0,  *fStatus);    // MinMatchLength.  To be filled later.
6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(0,  *fStatus);    // MaxMatchLength.  To be filled later.
6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the NOP
6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_NOP, 0);
6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // On the Parentheses stack, start a new frame and add the postions
6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of the URX_LB_CONT and the NOP.
6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(lookBehind, *fStatus);                       // Frame type
6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus);   // The first NOP location
6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The 2nd   NOP location
6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The final two instructions will be added when the ')' is encountered.
6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOpenLookBehindNeg:
6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   Compile a (?<! negated look-behind open paren.
6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //          Compiles to
6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              0       URX_LB_START     dataLoc    # Save entry stack, input len
6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              1       URX_LBN_CONT     dataLoc    # Iterate possible match positions
6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              2                        MinMatchLen
6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              3                        MaxMatchLen
6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              4                        continueLoc (9)
6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              5       URX_NOP          Standard '(' boilerplate.
6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              6       URX_NOP          Reserved slot for use with '|' ops within (block).
6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              7         <code for LookBehind expression>
6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              8       URX_LBN_END      dataLoc    # Check match len, cause a FAIL
6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              9       ...
6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //          Allocate a block of matcher data, to contain (when running a match)
6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              0:    Stack ptr on entry
7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              1:    Input Index on entry
7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              2:    Start index of match current match attempt.
7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //              3:    Original Input String len.
7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Generate match code for any pending literals.
7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals();
7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Allocate data space
7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dataLoc = fRXPat->fDataSize;
7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize += 4;
7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit URX_LB_START
7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = URX_BUILD(URX_LB_START, dataLoc);
7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit URX_LBN_CONT
7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_LBN_CONT, dataLoc);
7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(0,  *fStatus);    // MinMatchLength.  To be filled later.
7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(0,  *fStatus);    // MaxMatchLength.  To be filled later.
7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(0,  *fStatus);    // Continue Loc.    To be filled later.
7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the NOP
7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_NOP, 0);
7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // On the Parentheses stack, start a new frame and add the postions
7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of the URX_LB_CONT and the NOP.
7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(lookBehindN, *fStatus);                      // Frame type
7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus);   // The first NOP location
7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The 2nd   NOP location
7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The final two instructions will be added when the ')' is encountered.
7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doConditionalExpr:
7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Conditionals such as (?(1)a:b)
7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doPerlInline:
7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Perl inline-condtionals.  (?{perl code}a|b) We're not perl, no way to do them.
7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_UNIMPLEMENTED);
7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doCloseParen:
7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        handleCloseParen();
7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fParenStack.size() <= 0) {
7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  Extra close paren, or missing open paren.
7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            error(U_REGEX_MISMATCHED_PAREN);
7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doNOP:
7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBadOpenParenType:
7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doRuleError:
7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_RULE_SYNTAX);
7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doMismatchedParenErr:
7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_MISMATCHED_PAREN);
7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doPlus:
7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Normal '+'  compiles to
7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     1.   stuff to be repeated  (already built)
7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     2.   jmp-sav 1
7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     3.   ...
7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Or, if the item to be repeated can match a zero length string,
7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     1.   STO_INP_LOC  data-loc
7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     2.      body of stuff to be repeated
7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     3.   JMP_SAV_X    2
7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     4.   ...
7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Or, if the item to be repeated is simple
7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     1.   Item to be repeated.
7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     2.   LOOP_SR_I    set number  (assuming repeated item is a set ref)
7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     3.   LOOP_C       stack location
7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  topLoc = blockTopLoc(FALSE);        // location of item #1
7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  frameLoc;
7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Check for simple constructs, which may get special optimized code.
7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (topLoc == fRXPat->fCompiledPat->size() - 1) {
7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(topLoc);
7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (URX_TYPE(repeatedOp) == URX_SETREF) {
7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Emit optimized code for [char set]+
7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp));
7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    frameLoc = fRXPat->fFrameSize;
7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fFrameSize++;
7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (URX_TYPE(repeatedOp) == URX_DOTANY ||
8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    URX_TYPE(repeatedOp) == URX_DOTANY_ALL ||
8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) {
8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Emit Optimized code for .+ operations.
8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0);
8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // URX_LOOP_DOT_I operand is a flag indicating ". matches any" mode.
8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        loopOpI |= 1;
8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (fModeFlags & UREGEX_UNIX_LINES) {
8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        loopOpI |= 2;
8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fCompiledPat->addElement(loopOpI, *fStatus);
8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    frameLoc = fRXPat->fFrameSize;
8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fFrameSize++;
8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // General case.
8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Check for minimum match length of zero, which requires
8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    extra loop-breaking code.
8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (minMatchLength(topLoc, fRXPat->fCompiledPat->size()-1) == 0) {
8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Zero length match is possible.
8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Emit the code sequence that can handle it.
8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                insertOp(topLoc);
8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                frameLoc =  fRXPat->fFrameSize;
8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fFrameSize++;
8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t op = URX_BUILD(URX_STO_INP_LOC, frameLoc);
8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fCompiledPat->setElementAt(op, topLoc);
8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_BUILD(URX_JMP_SAV_X, topLoc+1);
8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fCompiledPat->addElement(op, *fStatus);
8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Simpler code when the repeated body must match something non-empty
8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  jmpOp  = URX_BUILD(URX_JMP_SAV, topLoc);
8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doNGPlus:
8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Non-greedy '+?'  compiles to
8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     1.   stuff to be repeated  (already built)
8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     2.   state-save  1
8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     3.   ...
8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t topLoc      = blockTopLoc(FALSE);
8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, topLoc);
8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus);
8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doOpt:
8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Normal (greedy) ? quantifier.
8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Compiles to
8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     1. state save 3
8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     2.    body of optional block
8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     3. ...
8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Insert the state save into the compiled pattern, and we're done.
8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   saveStateLoc = blockTopLoc(TRUE);
8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   saveStateOp  = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size());
8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doNGOpt:
8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Non-greedy ?? quantifier
8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   compiles to
8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    1.  jmp   4
8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    2.     body of optional block
8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    3   jmp   5
8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    4.  state save 2
8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    5    ...
8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  This code is less than ideal, with two jmps instead of one, because we can only
8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  insert one instruction at the top of the block being iterated.
8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  jmp1_loc = blockTopLoc(TRUE);
8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  jmp2_loc = fRXPat->fCompiledPat->size();
8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  jmp1_op  = URX_BUILD(URX_JMP, jmp2_loc+1);
8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(jmp1_op, jmp1_loc);
8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  jmp2_op  = URX_BUILD(URX_JMP, jmp2_loc+2);
8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(jmp2_op, *fStatus);
8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  save_op  = URX_BUILD(URX_STATE_SAVE, jmp1_loc+1);
8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(save_op, *fStatus);
8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doStar:
9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Normal (greedy) * quantifier.
9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Compiles to
9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       1.   STATE_SAVE   4
9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       2.      body of stuff being iterated over
9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       3.   JMP_SAV      2
9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       4.   ...
9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Or, if the body is a simple [Set],
9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       1.   LOOP_SR_I    set number
9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       2.   LOOP_C       stack location
9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       ...
9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Or if this is a .*
9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       1.   LOOP_DOT_I    (. matches all mode flag)
9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       2.   LOOP_C        stack location
9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Or, if the body can match a zero-length string, to inhibit infinite loops,
9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       1.   STATE_SAVE   5
9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       2.   STO_INP_LOC  data-loc
9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       3.      body of stuff
9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       4.   JMP_SAV_X    2
9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       5.   ...
9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // location of item #1, the STATE_SAVE
9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   topLoc = blockTopLoc(FALSE);
9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   dataLoc = -1;
9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Check for simple *, where the construct being repeated
9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   compiled to single opcode, and might be optimizable.
9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (topLoc == fRXPat->fCompiledPat->size() - 1) {
9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(topLoc);
9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (URX_TYPE(repeatedOp) == URX_SETREF) {
9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Emit optimized code for a [char set]*
9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedOp));
9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    dataLoc = fRXPat->fFrameSize;
9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fFrameSize++;
9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (URX_TYPE(repeatedOp) == URX_DOTANY ||
9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    URX_TYPE(repeatedOp) == URX_DOTANY_ALL ||
9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) {
9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Emit Optimized code for .* operations.
9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0);
9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // URX_LOOP_DOT_I operand is a flag indicating . matches any mode.
9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        loopOpI |= 1;
9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if ((fModeFlags & UREGEX_UNIX_LINES) != 0) {
9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        loopOpI |= 2;
9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    dataLoc = fRXPat->fFrameSize;
9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fFrameSize++;
9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit general case code for this *
9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The optimizations did not apply.
9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   saveStateLoc = blockTopLoc(TRUE);
9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   jmpOp        = URX_BUILD(URX_JMP_SAV, saveStateLoc+1);
9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Check for minimum match length of zero, which requires
9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    extra loop-breaking code.
9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (minMatchLength(saveStateLoc, fRXPat->fCompiledPat->size()-1) == 0) {
9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                insertOp(saveStateLoc);
9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                dataLoc =  fRXPat->fFrameSize;
9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fFrameSize++;
9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t op = URX_BUILD(URX_STO_INP_LOC, dataLoc);
9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fCompiledPat->setElementAt(op, saveStateLoc+1);
9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                jmpOp      = URX_BUILD(URX_JMP_SAV_X, saveStateLoc+2);
9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Locate the position in the compiled pattern where the match will continue
9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   after completing the *.   (4 or 5 in the comment above)
9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Put together the save state op store it into the compiled code.
9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc);
9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Append the URX_JMP_SAV or URX_JMPX operation to the compiled pattern.
9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doNGStar:
10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Non-greedy *? quantifier
10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // compiles to
10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     1.   JMP    3
10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     2.      body of stuff being iterated over
10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     3.   STATE_SAVE  2
10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     4    ...
10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t     jmpLoc  = blockTopLoc(TRUE);                   // loc  1.
10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t     saveLoc = fRXPat->fCompiledPat->size();        // loc  3.
10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t     jmpOp   = URX_BUILD(URX_JMP, saveLoc);
10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t     stateSaveOp = URX_BUILD(URX_STATE_SAVE, jmpLoc+1);
10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(jmpOp, jmpLoc);
10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(stateSaveOp, *fStatus);
10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doIntervalInit:
10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The '{' opening an interval quantifier was just scanned.
10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Init the counter varaiables that will accumulate the values as the digits
10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    are scanned.
10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fIntervalLow = 0;
10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fIntervalUpper = -1;
10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doIntevalLowerDigit:
10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Scanned a digit from the lower value of an {lower,upper} interval
10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t digitValue = u_charDigitValue(fC.fChar);
10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(digitValue >= 0);
10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fIntervalLow = fIntervalLow*10 + digitValue;
10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fIntervalLow < 0) {
10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_REGEX_NUMBER_TOO_BIG);
10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doIntervalUpperDigit:
10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Scanned a digit from the upper value of an {lower,upper} interval
10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fIntervalUpper < 0) {
10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fIntervalUpper = 0;
10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t digitValue = u_charDigitValue(fC.fChar);
10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(digitValue >= 0);
10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fIntervalUpper = fIntervalUpper*10 + digitValue;
10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fIntervalUpper < 0) {
10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_REGEX_NUMBER_TOO_BIG);
10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doIntervalSame:
10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Scanned a single value interval like {27}.  Upper = Lower.
10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fIntervalUpper = fIntervalLow;
10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doInterval:
10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Finished scanning a normal {lower,upper} interval.  Generate the code for it.
10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (compileInlineInterval() == FALSE) {
10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            compileInterval(URX_CTR_INIT, URX_CTR_LOOP);
10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doPossessiveInterval:
10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Finished scanning a Possessive {lower,upper}+ interval.  Generate the code for it.
10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Remember the loc for the top of the block being looped over.
10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   (Can not reserve a slot in the compiled pattern at this time, because
10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    compileInterval needs to reserve also, and blockTopLoc can only reserve
10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //    once per block.)
10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t topLoc = blockTopLoc(FALSE);
10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Produce normal looping code.
10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            compileInterval(URX_CTR_INIT, URX_CTR_LOOP);
10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Surround the just-emitted normal looping code with a STO_SP ... LD_SP
10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  just as if the loop was inclosed in atomic parentheses.
10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // First the STO_SP before the start of the loop
10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            insertOp(topLoc);
10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  varLoc    = fRXPat->fDataSize;    // Reserve a data location for saving the
10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize += 1;                    //  state stack ptr.
10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  op        = URX_BUILD(URX_STO_SP, varLoc);
10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op, topLoc);
10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t loopOp = (int32_t)fRXPat->fCompiledPat->popi();
10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(loopOp) == URX_CTR_LOOP && URX_VAL(loopOp) == topLoc);
10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            loopOp++;     // point LoopOp after the just-inserted STO_SP
10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->push(loopOp, *fStatus);
10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Then the LD_SP after the end of the loop
10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_LD_SP, varLoc);
10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doNGInterval:
10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Finished scanning a non-greedy {lower,upper}? interval.  Generate the code for it.
11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        compileInterval(URX_CTR_INIT_NG, URX_CTR_LOOP_NG);
11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doIntervalError:
11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_BAD_INTERVAL);
11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doLiteralChar:
11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We've just scanned a "normal" character from the pattern,
11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        literalChar(fC.fChar);
11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doEscapedLiteralChar:
11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We've just scanned an backslashed escaped character with  no
11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   special meaning.  It represents itself.
11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((fModeFlags & UREGEX_ERROR_ON_UNKNOWN_ESCAPES) != 0 &&
11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ((fC.fChar >= 0x41 && fC.fChar<= 0x5A) ||     // in [A-Z]
11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (fC.fChar >= 0x61 && fC.fChar <= 0x7a))) {   // in [a-z]
11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org               error(U_REGEX_BAD_ESCAPE_SEQUENCE);
11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             }
11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        literalChar(fC.fChar);
11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doDotAny:
11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // scanned a ".",  match any single character.
11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   op;
11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fModeFlags & UREGEX_DOTALL) {
11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_BUILD(URX_DOTANY_ALL, 0);
11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if (fModeFlags & UREGEX_UNIX_LINES) {
11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_BUILD(URX_DOTANY_UNIX, 0);
11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_BUILD(URX_DOTANY, 0);
11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doCaret:
11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = 0;
11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (       (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_CARET;
11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_CARET_M;
11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_CARET;   // Only testing true start of input.
11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_CARET_M_UNIX;
11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doDollar:
11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = 0;
11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (       (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_DOLLAR;
11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) == 0) {
11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_DOLLAR_M;
11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_DOLLAR_D;
11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & UREGEX_UNIX_LINES) != 0) {
11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_DOLLAR_MD;
11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashA:
11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_CARET, 0), *fStatus);
11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashB:
11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            #if  UCONFIG_NO_BREAK_ITERATION==1
11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fModeFlags & UREGEX_UWORD) {
11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_UNSUPPORTED_ERROR);
11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            #endif
11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BACKSLASH_B;
11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(op, 1), *fStatus);
11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashb:
11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            #if  UCONFIG_NO_BREAK_ITERATION==1
11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fModeFlags & UREGEX_UWORD) {
11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_UNSUPPORTED_ERROR);
11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            #endif
12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BACKSLASH_B;
12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashD:
12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 1), *fStatus);
12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashd:
12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 0), *fStatus);
12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashG:
12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_G, 0), *fStatus);
12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashS:
12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(
12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            URX_BUILD(URX_STAT_SETREF_N, URX_ISSPACE_SET), *fStatus);
12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashs:
12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(
12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            URX_BUILD(URX_STATIC_SETREF, URX_ISSPACE_SET), *fStatus);
12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashW:
12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(
12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            URX_BUILD(URX_STAT_SETREF_N, URX_ISWORD_SET), *fStatus);
12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashw:
12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(
12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            URX_BUILD(URX_STATIC_SETREF, URX_ISWORD_SET), *fStatus);
12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashX:
12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_X, 0), *fStatus);
12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashZ:
12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOLLAR, 0), *fStatus);
12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackslashz:
12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_Z, 0), *fStatus);
12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doEscapeError:
12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_BAD_ESCAPE_SEQUENCE);
12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doExit:
12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        returnVal = FALSE;
12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doProperty:
12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *theSet = scanProp();
12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            compileSet(theSet);
12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doNamedChar:
12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 c = scanNamedChar();
12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            literalChar(c);
12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBackRef:
12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // BackReference.  Somewhat unusual in that the front-end can not completely parse
12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                 the regular expression, because the number of digits to be consumed
12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                 depends on the number of capture groups that have been defined.  So
12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                 we have to do it here instead.
12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  numCaptureGroups = fRXPat->fGroupMap->size();
12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  groupNum = 0;
12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32  c        = fC.fChar;
12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (;;) {
12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Loop once per digit, for max allowed number of digits in a back reference.
12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t digit = u_charDigitValue(c);
12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                groupNum = groupNum * 10 + digit;
13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (groupNum >= numCaptureGroups) {
13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c = peekCharLL();
13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (RegexStaticSets::gStaticSets->fRuleDigitsAlias->contains(c) == FALSE) {
13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                nextCharLL();
13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Scan of the back reference in the source regexp is complete.  Now generate
13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  the compiled code for it.
13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Because capture groups can be forward-referenced by back-references,
13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  we fill the operand with the capture group number.  At the end
13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  of compilation, it will be changed to the variable's location.
13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(groupNum > 0);  // Shouldn't happen.  '\0' begins an octal escape sequence,
13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                     //    and shouldn't enter this code path at all.
13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  op;
13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_BUILD(URX_BACKREF_I, groupNum);
13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_BUILD(URX_BACKREF, groupNum);
13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doPossessivePlus:
13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Possessive ++ quantifier.
13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Compiles to
13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       1.   STO_SP
13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       2.      body of stuff being iterated over
13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       3.   STATE_SAVE 5
13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       4.   JMP        2
13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       5.   LD_SP
13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       6.   ...
13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Note:  TODO:  This is pretty inefficient.  A mass of saved state is built up
13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //                then unconditionally discarded.  Perhaps introduce a new opcode.  Ticket 6056
13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the STO_SP
13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   topLoc = blockTopLoc(TRUE);
13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   stoLoc = fRXPat->fDataSize;
13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize++;       // Reserve the data location for storing save stack ptr.
13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   op     = URX_BUILD(URX_STO_SP, stoLoc);
13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op, topLoc);
13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the STATE_SAVE
13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+2);
13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the JMP
13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_JMP, topLoc+1);
13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the LD_SP
13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_LD_SP, stoLoc);
13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doPossessiveStar:
13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Possessive *+ quantifier.
13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Compiles to
13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       1.   STO_SP       loc
13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       2.   STATE_SAVE   5
13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       3.      body of stuff being iterated over
13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       4.   JMP          2
13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       5.   LD_SP        loc
13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       6    ...
13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // TODO:  do something to cut back the state stack each time through the loop.
13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Reserve two slots at the top of the block.
13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   topLoc = blockTopLoc(TRUE);
13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            insertOp(topLoc);
13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // emit   STO_SP     loc
13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   stoLoc = fRXPat->fDataSize;
13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize++;       // Reserve the data location for storing save stack ptr.
13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   op     = URX_BUILD(URX_STO_SP, stoLoc);
13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op, topLoc);
13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the SAVE_STATE   5
13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t L7 = fRXPat->fCompiledPat->size()+1;
13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_STATE_SAVE, L7);
13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op, topLoc+1);
13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Append the JMP operation.
13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_JMP, topLoc+1);
13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the LD_SP       loc
13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_LD_SP, stoLoc);
13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doPossessiveOpt:
14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Possessive  ?+ quantifier.
14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Compiles to
14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     1. STO_SP      loc
14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     2. SAVE_STATE  5
14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     3.    body of optional block
14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     4. LD_SP       loc
14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     5. ...
14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Reserve two slots at the top of the block.
14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   topLoc = blockTopLoc(TRUE);
14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            insertOp(topLoc);
14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the STO_SP
14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   stoLoc = fRXPat->fDataSize;
14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fDataSize++;       // Reserve the data location for storing save stack ptr.
14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   op     = URX_BUILD(URX_STO_SP, stoLoc);
14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op, topLoc);
14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the SAVE_STATE
14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   continueLoc = fRXPat->fCompiledPat->size()+1;
14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_STATE_SAVE, continueLoc);
14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op, topLoc+1);
14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Emit the LD_SP
14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_LD_SP, stoLoc);
14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBeginMatchMode:
14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fNewModeFlags = fModeFlags;
14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fSetModeFlag  = TRUE;
14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doMatchMode:   //  (?i)    and similar
14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  bit = 0;
14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            switch (fC.fChar) {
14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case 0x69: /* 'i' */   bit = UREGEX_CASE_INSENSITIVE; break;
14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case 0x64: /* 'd' */   bit = UREGEX_UNIX_LINES;       break;
14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case 0x6d: /* 'm' */   bit = UREGEX_MULTILINE;        break;
14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case 0x73: /* 's' */   bit = UREGEX_DOTALL;           break;
14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case 0x75: /* 'u' */   bit = 0; /* Unicode casing */  break;
14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case 0x77: /* 'w' */   bit = UREGEX_UWORD;            break;
14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case 0x78: /* 'x' */   bit = UREGEX_COMMENTS;         break;
14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case 0x2d: /* '-' */   fSetModeFlag = FALSE;          break;
14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            default:
14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(FALSE);   // Should never happen.  Other chars are filtered out
14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                   // by the scanner.
14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fSetModeFlag) {
14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fNewModeFlags |= bit;
14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fNewModeFlags &= ~bit;
14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetMatchMode:
14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Emit code to match any pending literals, using the not-yet changed match mode.
14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals();
14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We've got a (?i) or similar.  The match mode is being changed, but
14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   the change is not scoped to a parenthesized block.
14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fNewModeFlags < 0);
14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fModeFlags = fNewModeFlags;
14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doMatchModeParen:
14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We've got a (?i: or similar.  Begin a parenthesized block, save old
14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   mode flags so they can be restored at the close of the block.
14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Compile to a
14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - NOP, which later may be replaced by a save-state if the
14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //         parenthesized group gets a * quantifier, followed by
14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //      - NOP, which may later be replaced by a save-state if there
14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //             is an '|' alternation within the parens.
14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fixLiterals(FALSE);
14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // On the Parentheses stack, start a new frame and add the postions
14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of the two NOPs (a normal non-capturing () frame, except for the
14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   saving of the orignal mode flags.)
14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fModeFlags, *fStatus);
14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(flags, *fStatus);                            // Frame Marker
14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus);   // The first NOP
14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP
14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Set the current mode flags to the new values.
14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(fNewModeFlags < 0);
14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fModeFlags = fNewModeFlags;
14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doBadModeFlag:
15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_INVALID_FLAG);
15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSuppressComments:
15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We have just scanned a '(?'.  We now need to prevent the character scanner from
15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // treating a '#' as a to-the-end-of-line comment.
15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   (This Perl compatibility just gets uglier and uglier to do...)
15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fEOLComments = FALSE;
15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetAddAmp:
15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          set->add(chAmp);
15176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
15196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetAddDash:
15216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
15226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
15236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          set->add(chDash);
15246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
15266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     case doSetBackslash_s:
15286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
15296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
15306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         set->addAll(*RegexStaticSets::gStaticSets->fPropSets[URX_ISSPACE_SET]);
15316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         break;
15326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     case doSetBackslash_S:
15356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
15366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
15376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet SSet(*RegexStaticSets::gStaticSets->fPropSets[URX_ISSPACE_SET]);
15386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            SSet.complement();
15396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->addAll(SSet);
15406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
15416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetBackslash_d:
15446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
15456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
15466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // TODO - make a static set, ticket 6058.
15476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_ND_MASK, *fStatus);
15486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
15496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetBackslash_D:
15526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
15536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
15546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet digits;
15556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // TODO - make a static set, ticket 6058.
15566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            digits.applyIntPropertyValue(UCHAR_GENERAL_CATEGORY_MASK, U_GC_ND_MASK, *fStatus);
15576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            digits.complement();
15586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->addAll(digits);
15596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
15606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetBackslash_w:
15636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
15646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
15656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->addAll(*RegexStaticSets::gStaticSets->fPropSets[URX_ISWORD_SET]);
15666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
15676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetBackslash_W:
15706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
15716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *set = (UnicodeSet *)fSetStack.peek();
15726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet SSet(*RegexStaticSets::gStaticSets->fPropSets[URX_ISWORD_SET]);
15736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            SSet.complement();
15746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->addAll(SSet);
15756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
15766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetBegin:
15796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);
15806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fSetStack.push(new UnicodeSet(), *fStatus);
15816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fSetOpStack.push(setStart, *fStatus);
15826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) != 0) {
15836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fSetOpStack.push(setCaseClose, *fStatus);
15846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
15866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetBeginDifference1:
15886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  We have scanned something like [[abc]-[
15896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Set up a new UnicodeSet for the set beginning with the just-scanned '['
15906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Push a Difference operator, which will cause the new set to be subtracted from what
15916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    went before once it is created.
15926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setPushOp(setDifference1);
15936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fSetOpStack.push(setStart, *fStatus);
15946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) != 0) {
15956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fSetOpStack.push(setCaseClose, *fStatus);
15966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
15986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetBeginIntersection1:
16006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  We have scanned something like  [[abc]&[
16016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Need both the '&' operator and the open '[' operator.
16026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setPushOp(setIntersection1);
16036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fSetOpStack.push(setStart, *fStatus);
16046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) != 0) {
16056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fSetOpStack.push(setCaseClose, *fStatus);
16066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
16086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetBeginUnion:
16106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  We have scanned something like  [[abc][
16116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     Need to handle the union operation explicitly [[abc] | [
16126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setPushOp(setUnion);
16136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fSetOpStack.push(setStart, *fStatus);
16146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) != 0) {
16156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fSetOpStack.push(setCaseClose, *fStatus);
16166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
16186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetDifference2:
16206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We have scanned something like [abc--
16216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Consider this to unambiguously be a set difference operator.
16226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setPushOp(setDifference2);
16236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
16246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetEnd:
16266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Have encountered the ']' that closes a set.
16276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    Force the evaluation of any pending operations within this set,
16286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    leave the completed set on the top of the set stack.
16296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setEval(setEnd);
16306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fSetOpStack.peeki()==setStart);
16316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fSetOpStack.popi();
16326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
16336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetFinish:
16356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
16366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Finished a complete set expression, including all nested sets.
16376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   The close bracket has already triggered clearing out pending set operators,
16386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    the operator stack should be empty and the operand stack should have just
16396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    one entry, the result set.
16406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fSetOpStack.empty());
16416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeSet *theSet = (UnicodeSet *)fSetStack.pop();
16426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fSetStack.empty());
16436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        compileSet(theSet);
16446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
16456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetIntersection2:
16486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Have scanned something like [abc&&
16496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setPushOp(setIntersection2);
16506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
16516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetLiteral:
16536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Union the just-scanned literal character into the set being built.
16546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    This operation is the highest precedence set operation, so we can always do
16556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    it immediately, without waiting to see what follows.  It is necessary to perform
16566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    any pending '-' or '&' operation first, because these have the same precedence
16576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    as union-ing in a literal'
16586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
16596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            setEval(setUnion);
16606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
16616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            s->add(fC.fChar);
16626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fLastSetLiteral = fC.fChar;
16636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
16646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetLiteralEscaped:
16676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A back-slash escaped literal character was encountered.
16686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Processing is the same as with setLiteral, above, with the addition of
16696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  the optional check for errors on escaped ASCII letters.
16706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
16716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if ((fModeFlags & UREGEX_ERROR_ON_UNKNOWN_ESCAPES) != 0 &&
16726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ((fC.fChar >= 0x41 && fC.fChar<= 0x5A) ||     // in [A-Z]
16736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 (fC.fChar >= 0x61 && fC.fChar <= 0x7a))) {   // in [a-z]
16746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_REGEX_BAD_ESCAPE_SEQUENCE);
16756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
16766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            setEval(setUnion);
16776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
16786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            s->add(fC.fChar);
16796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fLastSetLiteral = fC.fChar;
16806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
16816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case doSetNamedChar:
16846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Scanning a \N{UNICODE CHARACTER NAME}
16856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Aside from the source of the character, the processing is identical to doSetLiteral,
16866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    above.
16876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
16886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32  c = scanNamedChar();
16896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            setEval(setUnion);
16906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
16916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            s->add(c);
16926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fLastSetLiteral = c;
16936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
16946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetNamedRange:
16976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We have scanned literal-\N{CHAR NAME}.  Add the range to the set.
16986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The left character is already in the set, and is saved in fLastSetLiteral.
16996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The right side needs to be picked up, the scan is at the 'N'.
17006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Lower Limit > Upper limit being an error matches both Java
17016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //        and ICU UnicodeSet behavior.
17026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
17036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32  c = scanNamedChar();
17046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_SUCCESS(*fStatus) && fLastSetLiteral > c) {
17056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_REGEX_INVALID_RANGE);
17066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
17076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
17086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            s->add(fLastSetLiteral, c);
17096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fLastSetLiteral = c;
17106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
17116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case  doSetNegate:
17156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Scanned a '^' at the start of a set.
17166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Push the negation operator onto the set op stack.
17176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // A twist for case-insensitive matching:
17186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   the case closure operation must happen _before_ negation.
17196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   But the case closure operation will already be on the stack if it's required.
17206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   This requires checking for case closure, and swapping the stack order
17216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    if it is present.
17226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
17236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  tosOp = fSetOpStack.peeki();
17246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (tosOp == setCaseClose) {
17256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fSetOpStack.popi();
17266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fSetOpStack.push(setNegation, *fStatus);
17276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fSetOpStack.push(setCaseClose, *fStatus);
17286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
17296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fSetOpStack.push(setNegation, *fStatus);
17306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
17316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
17336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetNoCloseError:
17356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_MISSING_CLOSE_BRACKET);
17366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
17376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetOpError:
17396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_RULE_SYNTAX);   //  -- or && at the end of a set.  Illegal.
17406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
17416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetPosixProp:
17436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
17446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *s = scanPosixProp();
17456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (s != NULL) {
17466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeSet *tos = (UnicodeSet *)fSetStack.peek();
17476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tos->addAll(*s);
17486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                delete s;
17496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }  // else error.  scanProp() reported the error status already.
17506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
17526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetProp:
17546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Scanned a \p \P within [brackets].
17556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
17566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeSet *s = scanProp();
17576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (s != NULL) {
17586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeSet *tos = (UnicodeSet *)fSetStack.peek();
17596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tos->addAll(*s);
17606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                delete s;
17616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }  // else error.  scanProp() reported the error status already.
17626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
17646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case doSetRange:
17676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We have scanned literal-literal.  Add the range to the set.
17686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The left character is already in the set, and is saved in fLastSetLiteral.
17696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The right side is the current character.
17706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Lower Limit > Upper limit being an error matches both Java
17716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //        and ICU UnicodeSet behavior.
17726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
17736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fLastSetLiteral > fC.fChar) {
17746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            error(U_REGEX_INVALID_RANGE);
17756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeSet *s = (UnicodeSet *)fSetStack.peek();
17776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s->add(fLastSetLiteral, fC.fChar);
17786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
17796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    default:
17826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
17836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_INTERNAL_ERROR);
17846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
17856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
17886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        returnVal = FALSE;
17896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return returnVal;
17926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
17976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
17986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   literalChar           We've encountered a literal character from the pattern,
17996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                             or an escape sequence that reduces to a character.
18006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         Add it to the string containing all literal chars/strings from
18016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                             the pattern.
18026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
18046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexCompile::literalChar(UChar32 c)  {
18056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLiteralChars.append(c);
18066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
18106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    fixLiterals           When compiling something that can follow a literal
18126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          string in a pattern, emit the code to match the
18136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          accumulated literal string.
18146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          Optionally, split the last char of the string off into
18166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          a single "ONE_CHAR" operation, so that quantifiers can
18176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          apply to that char alone.  Example:   abc*
18186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          The * must apply to the 'c' only.
18196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
18216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid    RegexCompile::fixLiterals(UBool split) {
18226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  op = 0;                       // An op from/for the compiled pattern.
18236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // If no literal characters have been scanned but not yet had code generated
18256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   for them, nothing needs to be done.
18266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fLiteralChars.length() == 0) {
18276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
18286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.length(), -1);
18316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint);
18326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Split:  We need to  ensure that the last item in the compiled pattern
18346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     refers only to the last literal scanned in the pattern, so that
18356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     quantifiers (*, +, etc.) affect only it, and not a longer string.
18366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     Split before case folding for case insensitive matches.
18376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (split) {
18396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLiteralChars.truncate(indexOfLastCodePoint);
18406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);   // Recursive call, emit code to match the first part of the string.
18416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              //  Note that the truncated literal string may be empty, in which case
18426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              //  nothing will be emitted.
18436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        literalChar(lastCodePoint);  // Re-add the last code point as if it were a new literal.
18456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fixLiterals(FALSE);          // Second recursive call, code for the final code point.
18466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
18476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // If we are doing case-insensitive matching, case fold the string.  This may expand
18506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   the string, e.g. the German sharp-s turns into "ss"
18516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
18526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLiteralChars.foldCase();
18536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.length(), -1);
18546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint);
18556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (indexOfLastCodePoint == 0) {
18586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Single character, emit a URX_ONECHAR op to match it.
18596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((fModeFlags & UREGEX_CASE_INSENSITIVE) &&
18606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) {
18616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_ONECHAR_I, lastCodePoint);
18626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
18636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_ONECHAR, lastCodePoint);
18646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
18656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(op, *fStatus);
18666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
18676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Two or more chars, emit a URX_STRING to match them.
18686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
18696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_STRING_I, fRXPat->fLiteralText.length());
18706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
18716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // TODO here:  add optimization to split case sensitive strings of length two
18726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //             into two single char ops, for efficiency.
18736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_STRING, fRXPat->fLiteralText.length());
18746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
18756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(op, *fStatus);
18766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length());
18776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(op, *fStatus);
18786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Add this string into the accumulated strings of the compiled pattern.
18806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fLiteralText.append(fLiteralChars);
18816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLiteralChars.remove();
18846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
18926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   insertOp()             Insert a slot for a new opcode into the already
18946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          compiled pattern code.
18956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          Fill the slot with a NOP.  Our caller will replace it
18976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          with what they really wanted.
18986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
18996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
19006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid   RegexCompile::insertOp(int32_t where) {
19016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector64 *code = fRXPat->fCompiledPat;
19026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(where>0 && where < code->size());
19036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  nop = URX_BUILD(URX_NOP, 0);
19056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    code->insertElementAt(nop, where, *fStatus);
19066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Walk through the pattern, looking for any ops with targets that
19086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  were moved down by the insert.  Fix them.
19096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t loc;
19106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc=0; loc<code->size(); loc++) {
19116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t op = (int32_t)code->elementAti(loc);
19126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t opType = URX_TYPE(op);
19136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t opValue = URX_VAL(op);
19146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((opType == URX_JMP         ||
19156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            opType == URX_JMPX         ||
19166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            opType == URX_STATE_SAVE   ||
19176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            opType == URX_CTR_LOOP     ||
19186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            opType == URX_CTR_LOOP_NG  ||
19196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            opType == URX_JMP_SAV      ||
19206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            opType == URX_JMP_SAV_X    ||
19216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            opType == URX_RELOC_OPRND)    && opValue > where) {
19226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Target location for this opcode is after the insertion point and
19236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   needs to be incremented to adjust for the insertion.
19246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            opValue++;
19256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(opType, opValue);
19266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            code->setElementAt(op, loc);
19276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
19286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Now fix up the parentheses stack.  All positive values in it are locations in
19316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  the compiled pattern.   (Negative values are frame boundaries, and don't need fixing.)
19326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc=0; loc<fParenStack.size(); loc++) {
19336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t x = fParenStack.elementAti(loc);
19346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(x < code->size());
19356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (x>where) {
19366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            x++;
19376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParenStack.setElementAt(x, loc);
19386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
19396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatchCloseParen > where) {
19426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fMatchCloseParen++;
19436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fMatchOpenParen > where) {
19456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fMatchOpenParen++;
19466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
19526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
19536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   blockTopLoc()          Find or create a location in the compiled pattern
19546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          at the start of the operation or block that has
19556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          just been compiled.  Needed when a quantifier (* or
19566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          whatever) appears, and we need to add an operation
19576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          at the start of the thing being quantified.
19586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
19596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          (Parenthesized Blocks) have a slot with a NOP that
19606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          is reserved for this purpose.  .* or similar don't
19616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          and a slot needs to be added.
19626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
19636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//       parameter reserveLoc   :  TRUE -  ensure that there is space to add an opcode
19646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                                         at the returned location.
19656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                                 FALSE - just return the address,
19666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                                         do not reserve a location there.
19676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
19686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
19696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t   RegexCompile::blockTopLoc(UBool reserveLoc) {
19706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   theLoc;
19716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fixLiterals(TRUE);  // Emit code for any pending literals.
19726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        //   If last item was a string, emit separate op for the its last char.
19736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fRXPat->fCompiledPat->size() == fMatchCloseParen)
19746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
19756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The item just processed is a parenthesized block.
19766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        theLoc = fMatchOpenParen;   // A slot is already reserved for us.
19776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(theLoc > 0);
19786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(URX_TYPE(((uint32_t)fRXPat->fCompiledPat->elementAti(theLoc))) == URX_NOP);
19796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else {
19816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Item just compiled is a single thing, a ".", or a single char, a string or a set reference.
19826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // No slot for STATE_SAVE was pre-reserved in the compiled code.
19836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We need to make space now.
19846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        theLoc = fRXPat->fCompiledPat->size()-1;
19856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t opAtTheLoc = (int32_t)fRXPat->fCompiledPat->elementAti(theLoc);
19866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (URX_TYPE(opAtTheLoc) == URX_STRING_LEN) {
19876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Strings take two opcode, we want the position of the first one.
19886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // We can have a string at this point if a single character case-folded to two.
19896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            theLoc--;
19906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
19916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (reserveLoc) {
19926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  nop = URX_BUILD(URX_NOP, 0);
19936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->insertElementAt(nop, theLoc, *fStatus);
19946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
19956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return theLoc;
19976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
20026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//    handleCloseParen      When compiling a close paren, we need to go back
20046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          and fix up any JMP or SAVE operations within the
20056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          parenthesized block that need to target the end
20066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          of the block.  The locations of these are kept on
20076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          the paretheses stack.
20086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          This function is called both when encountering a
20106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                          real ) and at the end of the pattern.
20116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
20126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
20136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid  RegexCompile::handleCloseParen() {
20146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   patIdx;
20156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   patOp;
20166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fParenStack.size() <= 0) {
20176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_MISMATCHED_PAREN);
20186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
20196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Emit code for any pending literals.
20226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fixLiterals(FALSE);
20236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Fixup any operations within the just-closed parenthesized group
20256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    that need to reference the end of the (block).
20266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    (The first one popped from the stack is an unused slot for
20276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     alternation (OR) state save, but applying the fixup to it does no harm.)
20286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
20296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        patIdx = fParenStack.popi();
20306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (patIdx < 0) {
20316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // value < 0 flags the start of the frame on the paren stack.
20326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
20336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(patIdx>0 && patIdx <= fRXPat->fCompiledPat->size());
20356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        patOp = (int32_t)fRXPat->fCompiledPat->elementAti(patIdx);
20366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(URX_VAL(patOp) == 0);          // Branch target for JMP should not be set.
20376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        patOp |= fRXPat->fCompiledPat->size();  // Set it now.
20386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->setElementAt(patOp, patIdx);
20396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fMatchOpenParen     = patIdx;
20406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  At the close of any parenthesized block, restore the match mode flags  to
20436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  the value they had at the open paren.  Saved value is
20446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  at the top of the paren stack.
20456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fModeFlags = fParenStack.popi();
20466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(fModeFlags < 0);
20476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // DO any additional fixups, depending on the specific kind of
20496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // parentesized grouping this is
20506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch (patIdx) {
20526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case plain:
20536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case flags:
20546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // No additional fixups required.
20556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   (Grouping-only parentheses)
20566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
20576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case capturing:
20586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Capturing Parentheses.
20596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Insert a End Capture op into the pattern.
20606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   The frame offset of the variables for this cg is obtained from the
20616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //       start capture op and put it into the end-capture op.
20626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
20636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   captureOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
20646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(captureOp) == URX_START_CAPTURE);
20656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   frameVarLocation = URX_VAL(captureOp);
20676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   endCaptureOp = URX_BUILD(URX_END_CAPTURE, frameVarLocation);
20686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus);
20696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
20716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case atomic:
20726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Atomic Parenthesis.
20736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Insert a LD_SP operation to restore the state stack to the position
20746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   it was when the atomic parens were entered.
20756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
20766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   stoOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
20776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(stoOp) == URX_STO_SP);
20786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   stoLoc = URX_VAL(stoOp);
20796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t   ldOp   = URX_BUILD(URX_LD_SP, stoLoc);
20806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(ldOp, *fStatus);
20816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
20836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case lookAhead:
20856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
20866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
20876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
20886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dataLoc  = URX_VAL(startOp);
20896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op       = URX_BUILD(URX_LA_END, dataLoc);
20906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
20916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
20936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case negLookAhead:
20956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
20966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // See comment at doOpenLookAheadNeg
20976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-1);
20986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
20996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dataLoc  = URX_VAL(startOp);
21006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op       = URX_BUILD(URX_LA_END, dataLoc);
21016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
21026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op               = URX_BUILD(URX_BACKTRACK, 0);
21036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
21046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op               = URX_BUILD(URX_LA_END, dataLoc);
21056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
21066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Patch the URX_SAVE near the top of the block.
21086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The destination of the SAVE is the final LA_END that was just added.
21096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t saveOp   = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen);
21106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE);
21116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dest     = fRXPat->fCompiledPat->size()-1;
21126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            saveOp           = URX_BUILD(URX_STATE_SAVE, dest);
21136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(saveOp, fMatchOpenParen);
21146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
21156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
21166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case lookBehind:
21186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
21196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // See comment at doOpenLookBehind.
21206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Append the URX_LB_END and URX_LA_END to the compiled pattern.
21226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-4);
21236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
21246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dataLoc  = URX_VAL(startOp);
21256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op       = URX_BUILD(URX_LB_END, dataLoc);
21266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
21276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    op       = URX_BUILD(URX_LA_END, dataLoc);
21286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
21296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Determine the min and max bounds for the length of the
21316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  string that the pattern can match.
21326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  An unbounded upper limit is an error.
21336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t patEnd   = fRXPat->fCompiledPat->size() - 1;
21346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t minML    = minMatchLength(fMatchOpenParen, patEnd);
21356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t maxML    = maxMatchLength(fMatchOpenParen, patEnd);
21366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (maxML == INT32_MAX) {
21376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_REGEX_LOOK_BEHIND_LIMIT);
21386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
21396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
21406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(minML <= maxML);
21416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Insert the min and max match len bounds into the URX_LB_CONT op that
21436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  appears at the top of the look-behind block, at location fMatchOpenParen+1
21446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(minML,  fMatchOpenParen-2);
21456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(maxML,  fMatchOpenParen-1);
21466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
21486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
21496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case lookBehindN:
21536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
21546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // See comment at doOpenLookBehindNeg.
21556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Append the URX_LBN_END to the compiled pattern.
21576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t  startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOpenParen-5);
21586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
21596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t dataLoc  = URX_VAL(startOp);
21606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t op       = URX_BUILD(URX_LBN_END, dataLoc);
21616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(op, *fStatus);
21626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Determine the min and max bounds for the length of the
21646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  string that the pattern can match.
21656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  An unbounded upper limit is an error.
21666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t patEnd   = fRXPat->fCompiledPat->size() - 1;
21676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t minML    = minMatchLength(fMatchOpenParen, patEnd);
21686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t maxML    = maxMatchLength(fMatchOpenParen, patEnd);
21696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (maxML == INT32_MAX) {
21706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                error(U_REGEX_LOOK_BEHIND_LIMIT);
21716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
21726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
21736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(minML <= maxML);
21746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Insert the min and max match len bounds into the URX_LB_CONT op that
21766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  appears at the top of the look-behind block, at location fMatchOpenParen+1
21776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(minML,  fMatchOpenParen-3);
21786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(maxML,  fMatchOpenParen-2);
21796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Insert the pattern location to continue at after a successful match
21816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  as the last operand of the URX_LBN_CONT
21826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            op = URX_BUILD(URX_RELOC_OPRND, fRXPat->fCompiledPat->size());
21836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op,  fMatchOpenParen-1);
21846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
21856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
21866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    default:
21906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(FALSE);
21916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
21926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // remember the next location in the compiled pattern.
21946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The compilation of Quantifiers will look at this to see whether its looping
21956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   over a parenthesized block or a single item
21966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fMatchCloseParen = fRXPat->fCompiledPat->size();
21976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
21986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
21996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
22026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   compileSet       Compile the pattern operations for a reference to a
22046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                    UnicodeSet.
22056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
22076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid        RegexCompile::compileSet(UnicodeSet *theSet)
22086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
22096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (theSet == NULL) {
22106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
22116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Remove any strings from the set.
22136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  There shoudn't be any, but just in case.
22146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     (Case Closure can add them; if we had a simple case closure avaialble that
22156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //      ignored strings, that would be better.)
22166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    theSet->removeAllStrings();
22176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t  setSize = theSet->size();
22186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch (setSize) {
22206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 0:
22216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
22226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Set of no elements.   Always fails to match.
22236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKTRACK, 0), *fStatus);
22246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete theSet;
22256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
22276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 1:
22296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
22306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The set contains only a single code point.  Put it into
22316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   the compiled pattern as a single char operation rather
22326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   than a set, and discard the set itself.
22336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            literalChar(theSet->charAt(0));
22346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            delete theSet;
22356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        break;
22376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    default:
22396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
22406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  The set contains two or more chars.  (the normal case)
22416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  Put it into the compiled pattern as a set.
22426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t setNumber = fRXPat->fSets->size();
22436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fSets->addElement(theSet, *fStatus);
22446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t setOp = URX_BUILD(URX_SETREF, setNumber);
22456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(setOp, *fStatus);
22466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
22476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
22496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
22526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   compileInterval    Generate the code for a {min, max} style interval quantifier.
22546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                      Except for the specific opcodes used, the code is the same
22556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                      for all three types (greedy, non-greedy, possessive) of
22566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                      intervals.  The opcodes are supplied as parameters.
22576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                      (There are two sets of opcodes - greedy & possessive use the
22586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                      same ones, while non-greedy has it's own.)
22596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                      The code for interval loops has this form:
22616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         0  CTR_INIT   counter loc (in stack frame)
22626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         1             5  patt address of CTR_LOOP at bottom of block
22636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         2             min count
22646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         3             max count   (-1 for unbounded)
22656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         4  ...        block to be iterated over
22666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                         5  CTR_LOOP
22676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
22686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                       In
22696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
22706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid        RegexCompile::compileInterval(int32_t InitOp,  int32_t LoopOp)
22716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
22726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The CTR_INIT op at the top of the block with the {n,m} quantifier takes
22736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   four slots in the compiled code.  Reserve them.
22746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   topOfBlock = blockTopLoc(TRUE);
22756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    insertOp(topOfBlock);
22766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    insertOp(topOfBlock);
22776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    insertOp(topOfBlock);
22786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The operands for the CTR_INIT opcode include the index in the matcher data
22806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   of the counter.  Allocate it now. There are two data items
22816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //        counterLoc   -->  Loop counter
22826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //               +1    -->  Input index (for breaking non-progressing loops)
22836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //                          (Only present if unbounded upper limit on loop)
22846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   counterLoc = fRXPat->fFrameSize;
22856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fFrameSize++;
22866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fIntervalUpper < 0) {
22876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fFrameSize++;
22886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
22896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   op = URX_BUILD(InitOp, counterLoc);
22916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fCompiledPat->setElementAt(op, topOfBlock);
22926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
22936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The second operand of CTR_INIT is the location following the end of the loop.
22946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Must put in as a URX_RELOC_OPRND so that the value will be adjusted if the
22956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   compilation of something later on causes the code to grow and the target
22966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   position to move.
22976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t loopEnd = fRXPat->fCompiledPat->size();
22986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    op = URX_BUILD(URX_RELOC_OPRND, loopEnd);
22996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fCompiledPat->setElementAt(op, topOfBlock+1);
23006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Followed by the min and max counts.
23026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fCompiledPat->setElementAt(fIntervalLow, topOfBlock+2);
23036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fCompiledPat->setElementAt(fIntervalUpper, topOfBlock+3);
23046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Apend the CTR_LOOP op.  The operand is the location of the CTR_INIT op.
23066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Goes at end of the block being looped over, so just append to the code so far.
23076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    op = URX_BUILD(LoopOp, topOfBlock);
23086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fCompiledPat->addElement(op, *fStatus);
23096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if ((fIntervalLow & 0xff000000) != 0 ||
23116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (fIntervalUpper > 0 && (fIntervalUpper & 0xff000000) != 0)) {
23126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            error(U_REGEX_NUMBER_TOO_BIG);
23136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
23146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fIntervalLow > fIntervalUpper && fIntervalUpper != -1) {
23166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_MAX_LT_MIN);
23176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexCompile::compileInlineInterval() {
23236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fIntervalUpper > 10 || fIntervalUpper < fIntervalLow) {
23246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Too big to inline.  Fail, which will cause looping code to be generated.
23256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   (Upper < Lower picks up unbounded upper and errors, both.)
23266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
23276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   topOfBlock = blockTopLoc(FALSE);
23306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fIntervalUpper == 0) {
23316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Pathological case.  Attempt no matches, as if the block doesn't exist.
23326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->setSize(topOfBlock);
23336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return TRUE;
23346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (topOfBlock != fRXPat->fCompiledPat->size()-1 && fIntervalUpper != 1) {
23376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The thing being repeated is not a single op, but some
23386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   more complex block.  Do it as a loop, not inlines.
23396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Note that things "repeated" a max of once are handled as inline, because
23406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //     the one copy of the code already generated is just fine.
23416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
23426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Pick up the opcode that is to be repeated
23456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
23466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(topOfBlock);
23476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Compute the pattern location where the inline sequence
23496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   will end, and set up the state save op that will be needed.
23506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
23516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t endOfSequenceLoc = fRXPat->fCompiledPat->size()-1
23526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                + fIntervalUpper + (fIntervalUpper-fIntervalLow);
23536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t saveOp = URX_BUILD(URX_STATE_SAVE, endOfSequenceLoc);
23546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fIntervalLow == 0) {
23556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        insertOp(topOfBlock);
23566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->setElementAt(saveOp, topOfBlock);
23576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Loop, emitting the op for the thing being repeated each time.
23626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    Loop starts at 1 because one instance of the op already exists in the pattern,
23636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    it was put there when it was originally encountered.
23646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i;
23656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=1; i<fIntervalUpper; i++ ) {
23666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (i == fIntervalLow) {
23676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(saveOp, *fStatus);
23686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
23696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (i > fIntervalLow) {
23706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->addElement(saveOp, *fStatus);
23716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
23726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fCompiledPat->addElement(op, *fStatus);
23736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
23756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
23766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
23806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   matchStartType    Determine how a match can start.
23826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     Used to optimize find() operations.
23836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     Operation is very similar to minMatchLength().  Walk the compiled
23856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     pattern, keeping an on-going minimum-match-length.  For any
23866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     op where the min match coming in is zero, add that ops possible
23876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     starting matches to the possible starts for the overall pattern.
23886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
23896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
23906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid   RegexCompile::matchStartType() {
23916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
23926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
23936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
23946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
23966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    loc;                    // Location in the pattern of the current op being processed.
23976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    op;                     // The op being processed
23986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    opType;                 // The opcode type of the op
23996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    currentLen = 0;         // Minimum length of a match to this point (loc) in the pattern
24006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    numInitialStrings = 0;  // Number of strings encountered that could match at start.
24016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool      atStart = TRUE;         // True if no part of the pattern yet encountered
24036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       //   could have advanced the position in a match.
24046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       //   (Maximum match length so far == 0)
24056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // forwardedLength is a vector holding minimum-match-length values that
24076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   are propagated forward in the pattern by JMP or STATE_SAVE operations.
24086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   It must be one longer than the pattern being checked because some  ops
24096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   will jmp to a end-of-block+1 location from within a block, and we must
24106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   count those when checking the block.
24116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t end = fRXPat->fCompiledPat->size();
24126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector32  forwardedLength(end+1, *fStatus);
24136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    forwardedLength.setSize(end+1);
24146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc=3; loc<end; loc++) {
24156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        forwardedLength.setElementAt(INT32_MAX, loc);
24166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
24176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc = 3; loc<end; loc++) {
24196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
24206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        opType = URX_TYPE(op);
24216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The loop is advancing linearly through the pattern.
24236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // If the op we are now at was the destination of a branch in the pattern,
24246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // and that path has a shorter minimum length than the current accumulated value,
24256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // replace the current accumulated value.
24266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (forwardedLength.elementAti(loc) < currentLen) {
24276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = forwardedLength.elementAti(loc);
24286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);
24296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
24306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        switch (opType) {
24326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Ops that don't change the total length matched
24336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RESERVED_OP:
24346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END:
24356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_FAIL:
24366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_LEN:
24376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_NOP:
24386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_START_CAPTURE:
24396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END_CAPTURE:
24406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_B:
24416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_BU:
24426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_G:
24436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_Z:
24446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR:
24456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_M:
24466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_D:
24476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_MD:
24486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RELOC_OPRND:
24496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_INP_LOC:
24506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
24516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF_I:
24526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_SP:          // Setup for atomic or possessive blocks.  Doesn't change what can match.
24546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LD_SP:
24556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
24566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET:
24586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (atStart) {
24596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fStartType = START_START;
24606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
24616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
24626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M:
24646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M_UNIX:
24656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (atStart) {
24666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fStartType = START_LINE;
24676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
24686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
24696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR:
24716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (currentLen == 0) {
24726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // This character could appear at the start of a match.
24736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Add it to the set of possible starting characters.
24746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->add(URX_VAL(op));
24756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numInitialStrings += 2;
24766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
24776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen++;
24786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
24796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
24806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_SETREF:
24836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (currentLen == 0) {
24846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  sn = URX_VAL(op);
24856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(sn > 0 && sn < fRXPat->fSets->size());
24866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UnicodeSet *s = (UnicodeSet *)fRXPat->fSets->elementAt(sn);
24876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->addAll(*s);
24886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numInitialStrings += 2;
24896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
24906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen++;
24916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
24926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
24936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
24946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_SR_I:
24956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // [Set]*, like a SETREF, above, in what it can match,
24966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  but may not match at all, so currentLen is not incremented.
24976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (currentLen == 0) {
24986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  sn = URX_VAL(op);
24996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(sn > 0 && sn < fRXPat->fSets->size());
25006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UnicodeSet *s = (UnicodeSet *)fRXPat->fSets->elementAt(sn);
25016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->addAll(*s);
25026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numInitialStrings += 2;
25036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
25046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
25056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_DOT_I:
25086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (currentLen == 0) {
25096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // .* at the start of a pattern.
25106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    Any character can begin the match.
25116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->clear();
25126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->complement();
25136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numInitialStrings += 2;
25146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
25156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
25166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATIC_SETREF:
25206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (currentLen == 0) {
25216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  sn = URX_VAL(op);
25226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(sn>0 && sn<URX_LAST_SET);
25236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UnicodeSet *s = fRXPat->fStaticSets[sn];
25246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->addAll(*s);
25256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numInitialStrings += 2;
25266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
25276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen++;
25286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
25296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STAT_SETREF_N:
25346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (currentLen == 0) {
25356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  sn = URX_VAL(op);
25366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UnicodeSet *s = fRXPat->fStaticSets[sn];
25376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeSet sc(*s);
25386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                sc.complement();
25396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->addAll(sc);
25406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numInitialStrings += 2;
25416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
25426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen++;
25436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
25446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_D:
25496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Digit Char
25506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             if (currentLen == 0) {
25516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 UnicodeSet s;
25526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 s.applyIntPropertyValue(UCHAR_GENERAL_CATEGORY_MASK, U_GC_ND_MASK, *fStatus);
25536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 if (URX_VAL(op) != 0) {
25546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     s.complement();
25556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 }
25566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 fRXPat->fInitialChars->addAll(s);
25576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 numInitialStrings += 2;
25586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
25596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen++;
25606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
25616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR_I:
25656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Case Insensitive Single Character.
25666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (currentLen == 0) {
25676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32  c = URX_VAL(op);
25686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (u_hasBinaryProperty(c, UCHAR_CASE_SENSITIVE)) {
25696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Disable optimizations on first char of match.
25716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // TODO: Compute the set of chars that case fold to this char, or to
25726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //       a string that begins with this char.
25736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //       For simple case folding, this code worked:
25746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   UnicodeSet s(c, c);
25756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   s.closeOver(USET_CASE_INSENSITIVE);
25766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   fRXPat->fInitialChars->addAll(s);
25776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fInitialChars->clear();
25796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fInitialChars->complement();
25806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
25816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Char has no case variants.  Just add it as-is to the
25826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   set of possible starting chars.
25836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fInitialChars->add(c);
25846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
25856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numInitialStrings += 2;
25866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
25876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen++;
25886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
25896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
25906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
25926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_X:   // Grahpeme Cluster.  Minimum is 1, max unbounded.
25936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_ALL:    // . matches one or two.
25946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY:
25956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_UNIX:
25966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (currentLen == 0) {
25976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // These constructs are all bad news when they appear at the start
25986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   of a match.  Any character can begin the match.
25996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->clear();
26006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fInitialChars->complement();
26016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                numInitialStrings += 2;
26026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
26036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen++;
26046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
26056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
26066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMPX:
26096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            loc++;             // Except for extra operand on URX_JMPX, same as URX_JMP.
26106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP:
26116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
26126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  jmpDest = URX_VAL(op);
26136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (jmpDest < loc) {
26146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Loop of some kind.  Can safely ignore, the worst that will happen
26156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  is that we understate the true minimum length
26166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    currentLen = forwardedLength.elementAti(loc+1);
26176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
26196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Forward jump.  Propagate the current min length to the target loc of the jump.
26206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(jmpDest <= end+1);
26216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (forwardedLength.elementAti(jmpDest) > currentLen) {
26226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        forwardedLength.setElementAt(currentLen, jmpDest);
26236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
26246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
26256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
26266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
26276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
26286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV:
26306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV_X:
26316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Combo of state save to the next loc, + jmp backwards.
26326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   Net effect on min. length computation is nothing.
26336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
26346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
26356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKTRACK:
26376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Fails are kind of like a branch, except that the min length was
26386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   propagated already, by the state save.
26396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = forwardedLength.elementAti(loc+1);
26406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
26416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
26426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATE_SAVE:
26456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
26466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // State Save, for forward jumps, propagate the current minimum.
26476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //             of the state save.
26486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  jmpDest = URX_VAL(op);
26496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (jmpDest > loc) {
26506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (currentLen < forwardedLength.elementAti(jmpDest)) {
26516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        forwardedLength.setElementAt(currentLen, jmpDest);
26526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
26536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
26546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
26556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
26566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
26576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING:
26626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
26636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                loc++;
26646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
26656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stringLen   = URX_VAL(stringLenOp);
26666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(stringLenOp) == URX_STRING_LEN);
26676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stringLenOp >= 2);
26686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (currentLen == 0) {
26696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Add the starting character of this string to the set of possible starting
26706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   characters for this pattern.
26716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t stringStartIdx = URX_VAL(op);
26726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32  c = fRXPat->fLiteralText.char32At(stringStartIdx);
26736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fInitialChars->add(c);
26746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Remember this string.  After the entire pattern has been checked,
26766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  if nothing else is identified that can start a match, we'll use it.
26776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    numInitialStrings++;
26786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fInitialStringIdx = stringStartIdx;
26796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fInitialStringLen = stringLen;
26806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
26816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen += stringLen;
26836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                atStart = FALSE;
26846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
26856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
26866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
26876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_I:
26886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
26896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Case-insensitive string.  Unlike exact-match strings, we won't
26906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   attempt a string search for possible match positions.  But we
26916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   do update the set of possible starting characters.
26926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                loc++;
26936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
26946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stringLen   = URX_VAL(stringLenOp);
26956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(URX_TYPE(stringLenOp) == URX_STRING_LEN);
26966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(stringLenOp >= 2);
26976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (currentLen == 0) {
26986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Add the starting character of this string to the set of possible starting
26996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   characters for this pattern.
27006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t stringStartIdx = URX_VAL(op);
27016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar32  c = fRXPat->fLiteralText.char32At(stringStartIdx);
27026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UnicodeSet s(c, c);
27036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // TODO:  compute correct set of starting chars for full case folding.
27056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //        For the moment, say any char can start.
27066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // s.closeOver(USET_CASE_INSENSITIVE);
27076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    s.clear();
27086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    s.complement();
27096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fRXPat->fInitialChars->addAll(s);
27116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    numInitialStrings += 2;  // Matching on an initial string not possible.
27126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
27136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen += stringLen;
27146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                atStart = FALSE;
27156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
27166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
27176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT:
27196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT_NG:
27206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
27216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Loop Init Ops.  These don't change the min length, but they are 4 word ops
27226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   so location must be updated accordingly.
27236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Loop Init Ops.
27246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   If the min loop count == 0
27256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //      move loc forwards to the end of the loop, skipping over the body.
27266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   If the min count is > 0,
27276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //      continue normal processing of the body of the loop.
27286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopEndLoc   = (int32_t)fRXPat->fCompiledPat->elementAti(loc+1);
27296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        loopEndLoc   = URX_VAL(loopEndLoc);
27306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minLoopCount = (int32_t)fRXPat->fCompiledPat->elementAti(loc+2);
27316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (minLoopCount == 0) {
27326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Min Loop Count of 0, treat like a forward branch and
27336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   move the current minimum length up to the target
27346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //   (end of loop) location.
27356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(loopEndLoc <= end+1);
27366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (forwardedLength.elementAti(loopEndLoc) > currentLen) {
27376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        forwardedLength.setElementAt(currentLen, loopEndLoc);
27386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
27396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
27406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                loc+=3;  // Skips over operands of CTR_INIT
27416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
27426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
27436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
27446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP:
27476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP_NG:
27486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Loop ops.
27496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  The jump is conditional, backwards only.
27506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
27516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
27526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_C:
27546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // More loop ops.  These state-save to themselves.
27556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   don't change the minimum match
27566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            atStart = FALSE;
27576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
27586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_START:
27616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_START:
27626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
27636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Look-around.  Scan forward until the matching look-ahead end,
27646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   without processing the look-around block.  This is overly pessimistic.
27656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
27666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Keep track of the nesting depth of look-around blocks.  Boilerplate code for
27676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   lookahead contains two LA_END instructions, so count goes up by two
27686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   for each LA_START.
27696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  depth = (opType == URX_LA_START? 2: 1);
27706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
27716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    loc++;
27726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
27736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_LA_START) {
27746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth+=2;
27756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
27766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_LB_START) {
27776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth++;
27786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
27796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_LA_END || URX_TYPE(op)==URX_LBN_END) {
27806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth--;
27816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (depth == 0) {
27826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
27836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
27846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
27856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_STATE_SAVE) {
27866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // Need this because neg lookahead blocks will FAIL to outside
27876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        //   of the block.
27886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        int32_t  jmpDest = URX_VAL(op);
27896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (jmpDest > loc) {
27906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if (currentLen < forwardedLength.elementAti(jmpDest)) {
27916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                forwardedLength.setElementAt(currentLen, jmpDest);
27926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
27936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
27946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
27956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(loc <= end);
27966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
27976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
27986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
27996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_END:
28016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_CONT:
28026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_END:
28036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_CONT:
28046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_END:
28056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(FALSE);     // Shouldn't get here.  These ops should be
28066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                 //  consumed by the scan in URX_LA_START and LB_START
28076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
28096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        default:
28116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(FALSE);
28126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
28136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
28156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // We have finished walking through the ops.  Check whether some forward jump
28186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   propagated a shorter length to location end+1.
28196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (forwardedLength.elementAti(end+1) < currentLen) {
28206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        currentLen = forwardedLength.elementAti(end+1);
28216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
28226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fInitialChars8->init(fRXPat->fInitialChars);
28256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Sort out what we should check for when looking for candidate match start positions.
28286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // In order of preference,
28296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     1.   Start of input text buffer.
28306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     2.   A literal string.
28316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     3.   Start of line in multi-line mode.
28326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     4.   A single literal character.
28336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     5.   A character from a set of characters.
28346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
28356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fRXPat->fStartType == START_START) {
28366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match only at the start of an input text string.
28376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    start type is already set.  We're done.
28386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (numInitialStrings == 1 && fRXPat->fMinMatchLen > 0) {
28396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match beginning only with a literal string.
28406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32  c = fRXPat->fLiteralText.char32At(fRXPat->fInitialStringIdx);
28416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fRXPat->fInitialChars->contains(c));
28426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fStartType   = START_STRING;
28436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fInitialChar = c;
28446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (fRXPat->fStartType == START_LINE) {
28456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Match at start of line in Multi-Line mode.
28466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Nothing to do here; everything is already set.
28476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (fRXPat->fMinMatchLen == 0) {
28486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Zero length match possible.  We could start anywhere.
28496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fStartType = START_NO_INFO;
28506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (fRXPat->fInitialChars->size() == 1) {
28516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // All matches begin with the same char.
28526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fStartType   = START_CHAR;
28536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fInitialChar = fRXPat->fInitialChars->charAt(0);
28546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fRXPat->fInitialChar != (UChar32)-1);
28556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if (fRXPat->fInitialChars->contains((UChar32)0, (UChar32)0x10ffff) == FALSE &&
28566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fMinMatchLen > 0) {
28576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Matches start with a set of character smaller than the set of all chars.
28586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fStartType = START_SET;
28596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
28606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Matches can start with anything
28616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fRXPat->fStartType = START_NO_INFO;
28626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
28636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return;
28656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
28666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
28706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
28716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   minMatchLength    Calculate the length of the shortest string that could
28726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     match the specified pattern.
28736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     Length is in 16 bit code units, not code points.
28746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
28756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     The calculated length may not be exact.  The returned
28766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     value may be shorter than the actual minimum; it must
28776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     never be longer.
28786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
28796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     start and end are the range of p-code operations to be
28806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     examined.  The endpoints are included in the range.
28816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
28826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
28836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t   RegexCompile::minMatchLength(int32_t start, int32_t end) {
28846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
28856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
28866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
28876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(start <= end);
28896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(end < fRXPat->fCompiledPat->size());
28906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    loc;
28936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    op;
28946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    opType;
28956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    currentLen = 0;
28966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
28986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // forwardedLength is a vector holding minimum-match-length values that
28996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   are propagated forward in the pattern by JMP or STATE_SAVE operations.
29006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   It must be one longer than the pattern being checked because some  ops
29016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   will jmp to a end-of-block+1 location from within a block, and we must
29026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   count those when checking the block.
29036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector32  forwardedLength(end+2, *fStatus);
29046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    forwardedLength.setSize(end+2);
29056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc=start; loc<=end+1; loc++) {
29066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        forwardedLength.setElementAt(INT32_MAX, loc);
29076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
29086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc = start; loc<=end; loc++) {
29106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
29116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        opType = URX_TYPE(op);
29126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The loop is advancing linearly through the pattern.
29146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // If the op we are now at was the destination of a branch in the pattern,
29156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // and that path has a shorter minimum length than the current accumulated value,
29166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // replace the current accumulated value.
29176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);  // MinLength == INT32_MAX for some
29186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                               //   no-match-possible cases.
29196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (forwardedLength.elementAti(loc) < currentLen) {
29206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = forwardedLength.elementAti(loc);
29216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);
29226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
29236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        switch (opType) {
29256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Ops that don't change the total length matched
29266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RESERVED_OP:
29276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END:
29286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_LEN:
29296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_NOP:
29306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_START_CAPTURE:
29316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END_CAPTURE:
29326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_B:
29336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_BU:
29346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_G:
29356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_Z:
29366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET:
29376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR:
29386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_M:
29396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_D:
29406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_MD:
29416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RELOC_OPRND:
29426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_INP_LOC:
29436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M:
29446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M_UNIX:
29456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
29466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF_I:
29476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_SP:          // Setup for atomic or possessive blocks.  Doesn't change what can match.
29496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LD_SP:
29506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV:
29526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV_X:
29536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Ops that match a minimum of one character (one or two 16 bit code units.)
29576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
29586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR:
29596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATIC_SETREF:
29606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STAT_SETREF_N:
29616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_SETREF:
29626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_D:
29636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR_I:
29646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_X:   // Grahpeme Cluster.  Minimum is 1, max unbounded.
29656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_ALL:    // . matches one or two.
29666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY:
29676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_UNIX:
29686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen++;
29696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMPX:
29736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            loc++;              // URX_JMPX has an extra operand, ignored here,
29746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                //   otherwise processed identically to URX_JMP.
29756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP:
29766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
29776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  jmpDest = URX_VAL(op);
29786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (jmpDest < loc) {
29796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Loop of some kind.  Can safely ignore, the worst that will happen
29806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    //  is that we understate the true minimum length
29816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    currentLen = forwardedLength.elementAti(loc+1);
29826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
29836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Forward jump.  Propagate the current min length to the target loc of the jump.
29846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(jmpDest <= end+1);
29856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (forwardedLength.elementAti(jmpDest) > currentLen) {
29866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        forwardedLength.setElementAt(currentLen, jmpDest);
29876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
29886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
29896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
29906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
29926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKTRACK:
29936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
29946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Back-tracks are kind of like a branch, except that the min length was
29956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   propagated already, by the state save.
29966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen = forwardedLength.elementAti(loc+1);
29976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
29986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
29996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATE_SAVE:
30026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
30036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // State Save, for forward jumps, propagate the current minimum.
30046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //             of the state save.
30056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  jmpDest = URX_VAL(op);
30066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (jmpDest > loc) {
30076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (currentLen < forwardedLength.elementAti(jmpDest)) {
30086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        forwardedLength.setElementAt(currentLen, jmpDest);
30096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
30106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
30116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
30126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
30136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING:
30166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
30176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                loc++;
30186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
30196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen += URX_VAL(stringLenOp);
30206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
30216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
30226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_I:
30256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
30266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                loc++;
30276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // TODO: with full case folding, matching input text may be shorter than
30286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //       the string we have here.  More smarts could put some bounds on it.
30296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //       Assume a min length of one for now.  A min length of zero causes
30306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //        optimization failures for a pattern like "string"+
30316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // currentLen += URX_VAL(stringLenOp);
30326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen += 1;
30336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
30346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
30356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT:
30376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT_NG:
30386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
30396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Loop Init Ops.
30406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   If the min loop count == 0
30416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //      move loc forwards to the end of the loop, skipping over the body.
30426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   If the min count is > 0,
30436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //      continue normal processing of the body of the loop.
30446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t loopEndLoc   = (int32_t)fRXPat->fCompiledPat->elementAti(loc+1);
30456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        loopEndLoc   = URX_VAL(loopEndLoc);
30466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t minLoopCount = (int32_t)fRXPat->fCompiledPat->elementAti(loc+2);
30476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (minLoopCount == 0) {
30486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    loc = loopEndLoc;
30496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
30506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    loc+=3;  // Skips over operands of CTR_INIT
30516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
30526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
30536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
30546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP:
30576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP_NG:
30586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Loop ops.
30596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  The jump is conditional, backwards only.
30606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
30616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_SR_I:
30636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_DOT_I:
30646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_C:
30656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // More loop ops.  These state-save to themselves.
30666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   don't change the minimum match - could match nothing at all.
30676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
30686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
30706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_START:
30716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_START:
30726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
30736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Look-around.  Scan forward until the matching look-ahead end,
30746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   without processing the look-around block.  This is overly pessimistic for look-ahead,
30756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   it assumes that the look-ahead match might be zero-length.
30766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   TODO:  Positive lookahead could recursively do the block, then continue
30776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //          with the longer of the block or the value coming in.  Ticket 6060
30786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  depth = (opType == URX_LA_START? 2: 1);;
30796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
30806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    loc++;
30816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
30826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_LA_START) {
30836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // The boilerplate for look-ahead includes two LA_END insturctions,
30846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        //    Depth will be decremented by each one when it is seen.
30856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth += 2;
30866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
30876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_LB_START) {
30886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth++;
30896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
30906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_LA_END) {
30916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth--;
30926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (depth == 0) {
30936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
30946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
30956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
30966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op)==URX_LBN_END) {
30976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth--;
30986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (depth == 0) {
30996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
31006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
31016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
31026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_STATE_SAVE) {
31036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // Need this because neg lookahead blocks will FAIL to outside
31046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        //   of the block.
31056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        int32_t  jmpDest = URX_VAL(op);
31066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (jmpDest > loc) {
31076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if (currentLen < forwardedLength.elementAti(jmpDest)) {
31086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                forwardedLength.setElementAt(currentLen, jmpDest);
31096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
31106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
31116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
31126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(loc <= end);
31136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
31146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
31156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
31166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_END:
31186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_CONT:
31196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_END:
31206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_CONT:
31216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_END:
31226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Only come here if the matching URX_LA_START or URX_LB_START was not in the
31236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   range being sized, which happens when measuring size of look-behind blocks.
31246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
31256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        default:
31276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(FALSE);
31286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
31296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
31316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // We have finished walking through the ops.  Check whether some forward jump
31336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   propagated a shorter length to location end+1.
31346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (forwardedLength.elementAti(end+1) < currentLen) {
31356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        currentLen = forwardedLength.elementAti(end+1);
31366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(currentLen>=0 && currentLen < INT32_MAX);
31376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
31386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return currentLen;
31406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
31416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Increment with overflow check.
31436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// val and delta will both be positive.
31446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t safeIncrement(int32_t val, int32_t delta) {
31466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (INT32_MAX - val > delta) {
31476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return val + delta;
31486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
31496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return INT32_MAX;
31506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
31516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
31526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
31556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
31566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   maxMatchLength    Calculate the length of the longest string that could
31576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     match the specified pattern.
31586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     Length is in 16 bit code units, not code points.
31596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
31606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     The calculated length may not be exact.  The returned
31616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     value may be longer than the actual maximum; it must
31626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     never be shorter.
31636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
31646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
31656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t   RegexCompile::maxMatchLength(int32_t start, int32_t end) {
31666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
31676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
31686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
31696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(start <= end);
31706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(end < fRXPat->fCompiledPat->size());
31716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    loc;
31746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    op;
31756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    opType;
31766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    currentLen = 0;
31776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector32  forwardedLength(end+1, *fStatus);
31786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    forwardedLength.setSize(end+1);
31796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc=start; loc<=end; loc++) {
31816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        forwardedLength.setElementAt(0, loc);
31826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
31836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc = start; loc<=end; loc++) {
31856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
31866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        opType = URX_TYPE(op);
31876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The loop is advancing linearly through the pattern.
31896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // If the op we are now at was the destination of a branch in the pattern,
31906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // and that path has a longer maximum length than the current accumulated value,
31916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // replace the current accumulated value.
31926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (forwardedLength.elementAti(loc) > currentLen) {
31936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = forwardedLength.elementAti(loc);
31946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
31956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
31966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        switch (opType) {
31976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Ops that don't change the total length matched
31986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RESERVED_OP:
31996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END:
32006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_LEN:
32016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_NOP:
32026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_START_CAPTURE:
32036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END_CAPTURE:
32046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_B:
32056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_BU:
32066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_G:
32076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_Z:
32086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET:
32096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR:
32106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_M:
32116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_D:
32126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_MD:
32136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RELOC_OPRND:
32146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_INP_LOC:
32156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M:
32166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M_UNIX:
32176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_SP:          // Setup for atomic or possessive blocks.  Doesn't change what can match.
32196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LD_SP:
32206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_END:
32226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_CONT:
32236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_CONT:
32246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_END:
32256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Ops that increase that cause an unbounded increase in the length
32296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   of a matched string, or that increase it a hard to characterize way.
32306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   Call the max length unbounded, and stop further checking.
32316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
32326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF_I:
32336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_X:   // Grahpeme Cluster.  Minimum is 1, max unbounded.
32346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = INT32_MAX;
32356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Ops that match a max of one character (possibly two 16 bit code units.)
32396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
32406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATIC_SETREF:
32416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STAT_SETREF_N:
32426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_SETREF:
32436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_D:
32446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR_I:
32456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_ALL:
32466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY:
32476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_UNIX:
32486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = safeIncrement(currentLen, 2);
32496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Single literal character.  Increase current max length by one or two,
32526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //       depending on whether the char is in the supplementary range.
32536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR:
32546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = safeIncrement(currentLen, 1);
32556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (URX_VAL(op) > 0x10000) {
32566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen = safeIncrement(currentLen, 1);
32576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
32586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Jumps.
32616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
32626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP:
32636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMPX:
32646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV:
32656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV_X:
32666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
32676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  jmpDest = URX_VAL(op);
32686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (jmpDest < loc) {
32696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Loop of some kind.  Max match length is unbounded.
32706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    currentLen = INT32_MAX;
32716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
32726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Forward jump.  Propagate the current min length to the target loc of the jump.
32736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (forwardedLength.elementAti(jmpDest) < currentLen) {
32746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        forwardedLength.setElementAt(currentLen, jmpDest);
32756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
32766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    currentLen = 0;
32776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
32786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
32796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKTRACK:
32826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // back-tracks are kind of like a branch, except that the max length was
32836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   propagated already, by the state save.
32846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = forwardedLength.elementAti(loc+1);
32856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
32866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
32886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATE_SAVE:
32896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
32906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // State Save, for forward jumps, propagate the current minimum.
32916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //               of the state save.
32926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //             For backwards jumps, they create a loop, maximum
32936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //               match length is unbounded.
32946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  jmpDest = URX_VAL(op);
32956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (jmpDest > loc) {
32966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (currentLen > forwardedLength.elementAti(jmpDest)) {
32976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        forwardedLength.setElementAt(currentLen, jmpDest);
32986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
32996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
33006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    currentLen = INT32_MAX;
33016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
33046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING:
33096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
33106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                loc++;
33116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
33126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
33136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
33146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_I:
33176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // TODO:  This code assumes that any user string that matches will be no longer
33186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        than our compiled string, with case insensitive matching.
33196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        Our compiled string has been case-folded already.
33206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
33216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        Any matching user string will have no more code points than our
33226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        compiled (folded) string.  Folding may add code points, but
33236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        not remove them.
33246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
33256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        There is a potential problem if a supplemental code point
33266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        case-folds to a BMP code point.  In this case our compiled string
33276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        could be shorter (in code units) than a matching user string.
33286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
33296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        At this time (Unicode 6.1) there are no such characters, and this case
33306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        is not being handled.  A test, intltest regex/Bug9283, will fail if
33316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        any problematic characters are added to Unicode.
33326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
33336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        If this happens, we can make a set of the BMP chars that the
33346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        troublesome supplementals fold to, scan our string, and bump the
33356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //        currentLen one extra for each that is found.
33366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
33376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
33386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                loc++;
33396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
33406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
33416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
33436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT:
33456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT_NG:
33466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // For Loops, recursively call this function on the pattern for the loop body,
33476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   then multiply the result by the maximum loop count.
33486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
33496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  loopEndLoc = URX_VAL(fRXPat->fCompiledPat->elementAti(loc+1));
33506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (loopEndLoc == loc+4) {
33516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Loop has an empty body. No affect on max match length.
33526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Continue processing with code after the loop end.
33536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    loc = loopEndLoc;
33546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
33556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t maxLoopCount = fRXPat->fCompiledPat->elementAti(loc+3);
33586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (maxLoopCount == -1) {
33596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Unbounded Loop. No upper bound on match length.
33606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    currentLen = INT32_MAX;
33616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
33626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(loopEndLoc >= loc+4);
33656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  blockLen = maxMatchLength(loc+4, loopEndLoc-1);  // Recursive call.
33666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (blockLen == INT32_MAX) {
33676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    currentLen = blockLen;
33686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
33696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
33706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentLen += blockLen * maxLoopCount;
33716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                loc = loopEndLoc;
33726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
33736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
33746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP:
33766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP_NG:
33776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // These opcodes will be skipped over by code for URX_CRT_INIT.
33786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // We shouldn't encounter them here.
33796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(FALSE);
33806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
33816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_SR_I:
33836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_DOT_I:
33846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_C:
33856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // For anything to do with loops, make the match length unbounded.
33866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            currentLen = INT32_MAX;
33876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
33886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_START:
33926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_END:
33936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Look-ahead.  Just ignore, treat the look-ahead block as if
33946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // it were normal pattern.  Gives a too-long match length,
33956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  but good enough for now.
33966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
33976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
33986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // End of look-ahead ops should always be consumed by the processing at
33996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  the URX_LA_START op.
34006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // U_ASSERT(FALSE);
34016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // break;
34026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_START:
34046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
34056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Look-behind.  Scan forward until the matching look-around end,
34066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   without processing the look-behind block.
34076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  depth = 0;
34086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (;;) {
34096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    loc++;
34106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
34116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) {
34126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth++;
34136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
34146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (URX_TYPE(op) == URX_LA_END || URX_TYPE(op)==URX_LBN_END) {
34156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (depth == 0) {
34166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
34176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
34186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        depth--;
34196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
34206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U_ASSERT(loc < end);
34216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
34226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
34236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
34246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        default:
34266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(FALSE);
34276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
34286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (currentLen == INT32_MAX) {
34316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  The maximum length is unbounded.
34326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  Stop further processing of the pattern.
34336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
34346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
34356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return currentLen;
34386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
34406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
34436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
34446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   stripNOPs    Remove any NOP operations from the compiled pattern code.
34456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                Extra NOPs are inserted for some constructs during the initial
34466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                code generation to provide locations that may be patched later.
34476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                Many end up unneeded, and are removed by this function.
34486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
34496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                In order to minimize the number of passes through the pattern,
34506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                back-reference fixup is also performed here (adjusting
34516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                back-reference operands to point to the correct frame offsets).
34526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
34536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
34546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexCompile::stripNOPs() {
34556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
34576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
34586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t    end = fRXPat->fCompiledPat->size();
34616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector32  deltas(end, *fStatus);
34626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Make a first pass over the code, computing the amount that things
34646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   will be offset at each location in the original code.
34656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   loc;
34666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t   d = 0;
34676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (loc=0; loc<end; loc++) {
34686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        deltas.addElement(d, *fStatus);
34696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
34706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (URX_TYPE(op) == URX_NOP) {
34716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            d++;
34726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
34736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
34746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString caseStringBuffer;
34766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Make a second pass over the code, removing the NOPs by moving following
34786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  code up, and patching operands that refer to code locations that
34796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  are being moved.  The array of offsets from the first step is used
34806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  to compute the new operand values.
34816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t src;
34826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t dst = 0;
34836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (src=0; src<end; src++) {
34846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(src);
34856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t opType = URX_TYPE(op);
34866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        switch (opType) {
34876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_NOP:
34886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
34896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
34906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATE_SAVE:
34916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP:
34926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP:
34936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_LOOP_NG:
34946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RELOC_OPRND:
34956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMPX:
34966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV:
34976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_JMP_SAV_X:
34986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // These are instructions with operands that refer to code locations.
34996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
35006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t  operandAddress = URX_VAL(op);
35016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(operandAddress>=0 && operandAddress<deltas.size());
35026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t fixedOperandAddress = operandAddress - deltas.elementAti(operandAddress);
35036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op = URX_BUILD(opType, fixedOperandAddress);
35046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fCompiledPat->setElementAt(op, dst);
35056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                dst++;
35066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
35076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF:
35106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKREF_I:
35116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
35126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t where = URX_VAL(op);
35136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (where > fRXPat->fGroupMap->size()) {
35146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    error(U_REGEX_INVALID_BACK_REF);
35156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
35166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
35176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                where = fRXPat->fGroupMap->elementAti(where-1);
35186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                op    = URX_BUILD(opType, where);
35196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fCompiledPat->setElementAt(op, dst);
35206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                dst++;
35216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fRXPat->fNeedsAltInput = TRUE;
35236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
35246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
35256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RESERVED_OP:
35266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_RESERVED_OP_N:
35276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKTRACK:
35286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END:
35296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR:
35306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING:
35316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_LEN:
35326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_START_CAPTURE:
35336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_END_CAPTURE:
35346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STATIC_SETREF:
35356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STAT_SETREF_N:
35366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_SETREF:
35376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY:
35386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_FAIL:
35396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_B:
35406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_BU:
35416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_G:
35426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_X:
35436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_Z:
35446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_ALL:
35456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_BACKSLASH_D:
35466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET:
35476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR:
35486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT:
35496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CTR_INIT_NG:
35506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOTANY_UNIX:
35516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_SP:
35526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LD_SP:
35536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STO_INP_LOC:
35546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_START:
35556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LA_END:
35566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_ONECHAR_I:
35576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_STRING_I:
35586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_M:
35596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M:
35606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_CARET_M_UNIX:
35616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_START:
35626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_CONT:
35636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LB_END:
35646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_CONT:
35656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LBN_END:
35666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_SR_I:
35676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_DOT_I:
35686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_LOOP_C:
35696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_D:
35706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        case URX_DOLLAR_MD:
35716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // These instructions are unaltered by the relocation.
35726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRXPat->fCompiledPat->setElementAt(op, dst);
35736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dst++;
35746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
35756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        default:
35776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Some op is unaccounted for.
35786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U_ASSERT(FALSE);
35796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            error(U_REGEX_INTERNAL_ERROR);
35806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
35816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
35826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRXPat->fCompiledPat->setSize(dst);
35846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
35856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
35896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
35906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
35916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Error         Report a rule parse error.
35926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                Only report it if no previous error has been recorded.
35936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
35946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
35956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexCompile::error(UErrorCode e) {
35966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(*fStatus)) {
35976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *fStatus = e;
35986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Hmm. fParseErr (UParseError) line & offset fields are int32_t in public
35996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // API (see common/unicode/parseerr.h), while fLineNum and fCharNum are
36006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // int64_t. If the values of the latter are out of range for the former,
36016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // set them to the appropriate "field not supported" values.
36026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fLineNum > 0x7FFFFFFF) {
36036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParseErr->line   = 0;
36046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParseErr->offset = -1;
36056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if (fCharNum > 0x7FFFFFFF) {
36066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParseErr->line   = (int32_t)fLineNum;
36076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParseErr->offset = -1;
36086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
36096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParseErr->line   = (int32_t)fLineNum;
36106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fParseErr->offset = (int32_t)fCharNum;
36116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
36126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR; // throwaway status for extracting context
36146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Fill in the context.
36166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   Note: extractBetween() pins supplied indicies to the string bounds.
36176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_memset(fParseErr->preContext,  0, sizeof(fParseErr->preContext));
36186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_memset(fParseErr->postContext, 0, sizeof(fParseErr->postContext));
36196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_extract(fRXPat->fPattern, fScanIndex-U_PARSE_CONTEXT_LEN+1, fScanIndex, fParseErr->preContext, U_PARSE_CONTEXT_LEN, &status);
36206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utext_extract(fRXPat->fPattern, fScanIndex, fScanIndex+U_PARSE_CONTEXT_LEN-1, fParseErr->postContext, U_PARSE_CONTEXT_LEN, &status);
36216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
36236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
36266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Assorted Unicode character constants.
36276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     Numeric because there is no portable way to enter them as literals.
36286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     (Think EBCDIC).
36296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
36306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chCR        = 0x0d;      // New lines, for terminating comments.
36316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chLF        = 0x0a;      // Line Feed
36326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chPound     = 0x23;      // '#', introduces a comment.
36336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chDigit0    = 0x30;      // '0'
36346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chDigit7    = 0x37;      // '9'
36356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chColon     = 0x3A;      // ':'
36366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chE         = 0x45;      // 'E'
36376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chQ         = 0x51;      // 'Q'
36386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//static const UChar      chN         = 0x4E;      // 'N'
36396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chP         = 0x50;      // 'P'
36406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chBackSlash = 0x5c;      // '\'  introduces a char escape
36416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//static const UChar      chLBracket  = 0x5b;      // '['
36426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chRBracket  = 0x5d;      // ']'
36436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chUp        = 0x5e;      // '^'
36446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chLowerP    = 0x70;
36456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chLBrace    = 0x7b;      // '{'
36466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chRBrace    = 0x7d;      // '}'
36476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chNEL       = 0x85;      //    NEL newline variant
36486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar      chLS        = 0x2028;    //    Unicode Line Separator
36496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
36526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
36536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  nextCharLL    Low Level Next Char from the regex pattern.
36546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                Get a char from the string, keep track of input position
36556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                     for error reporting.
36566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
36576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
36586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32  RegexCompile::nextCharLL() {
36596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32       ch;
36606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fPeekChar != -1) {
36626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = fPeekChar;
36636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fPeekChar = -1;
36646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return ch;
36656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // assume we're already in the right place
36686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ch = UTEXT_NEXT32(fRXPat->fPattern);
36696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (ch == U_SENTINEL) {
36706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return ch;
36716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (ch == chCR ||
36746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch == chNEL ||
36756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch == chLS   ||
36766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (ch == chLF && fLastChar != chCR)) {
36776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Character is starting a new line.  Bump up the line number, and
36786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  reset the column to 0.
36796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLineNum++;
36806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fCharNum=0;
36816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else {
36836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Character is not starting a new line.  Except in the case of a
36846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   LF following a CR, increment the column position.
36856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (ch != chLF) {
36866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fCharNum++;
36876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
36886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
36896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fLastChar = ch;
36906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return ch;
36916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
36926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
36936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
36946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
36956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   peekCharLL    Low Level Character Scanning, sneak a peek at the next
36966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                 character without actually getting it.
36976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
36986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
36996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32  RegexCompile::peekCharLL() {
37006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fPeekChar == -1) {
37016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fPeekChar = nextCharLL();
37026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
37036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fPeekChar;
37046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
37056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
37086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
37096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//   nextChar     for pattern scanning.  At this level, we handle stripping
37106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                out comments and processing some backslash character escapes.
37116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                The rest of the pattern grammar is handled at the next level up.
37126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
37136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
37146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexCompile::nextChar(RegexPatternChar &c) {
37156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fScanIndex = UTEXT_GETNATIVEINDEX(fRXPat->fPattern);
37176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    c.fChar    = nextCharLL();
37186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    c.fQuoted  = FALSE;
37196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fQuoteMode) {
37216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c.fQuoted = TRUE;
37226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((c.fChar==chBackSlash && peekCharLL()==chE && ((fModeFlags & UREGEX_LITERAL) == 0)) ||
37236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c.fChar == (UChar32)-1) {
37246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fQuoteMode = FALSE;  //  Exit quote mode,
37256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            nextCharLL();        // discard the E
37266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            nextChar(c);         // recurse to get the real next char
37276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
37286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
37296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else if (fInBackslashQuote) {
37306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The current character immediately follows a '\'
37316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Don't check for any further escapes, just return it as-is.
37326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Don't set c.fQuoted, because that would prevent the state machine from
37336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    dispatching on the character.
37346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInBackslashQuote = FALSE;
37356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
37366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else
37376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
37386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We are not in a \Q quoted region \E of the source.
37396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
37406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fModeFlags & UREGEX_COMMENTS) {
37416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //
37426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // We are in free-spacing and comments mode.
37436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  Scan through any white space and comments, until we
37446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //  reach a significant character or the end of inut.
37456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for (;;) {
37466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (c.fChar == (UChar32)-1) {
37476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;     // End of Input
37486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
37496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if  (c.fChar == chPound && fEOLComments == TRUE) {
37506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Start of a comment.  Consume the rest of it, until EOF or a new line
37516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    for (;;) {
37526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c.fChar = nextCharLL();
37536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (c.fChar == (UChar32)-1 ||  // EOF
37546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            c.fChar == chCR        ||
37556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            c.fChar == chLF        ||
37566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            c.fChar == chNEL       ||
37576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            c.fChar == chLS)       {
37586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            break;
37596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
37606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
37616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
37626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // TODO:  check what Java & Perl do with non-ASCII white spaces.  Ticket 6061.
37636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (PatternProps::isWhiteSpace(c.fChar) == FALSE) {
37646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
37656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
37666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c.fChar = nextCharLL();
37676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
37686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
37696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
37716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  check for backslash escaped characters.
37726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
37736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (c.fChar == chBackSlash) {
37746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int64_t pos = UTEXT_GETNATIVEINDEX(fRXPat->fPattern);
37756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (RegexStaticSets::gStaticSets->fUnescapeCharSet.contains(peekCharLL())) {
37766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //
37776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // A '\' sequence that is handled by ICU's standard unescapeAt function.
37786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Includes \uxxxx, \n, \r, many others.
37796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //   Return the single equivalent character.
37806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //
37816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                nextCharLL();                 // get & discard the peeked char.
37826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c.fQuoted = TRUE;
37836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (UTEXT_FULL_TEXT_IN_CHUNK(fRXPat->fPattern, fPatternLength)) {
37856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t endIndex = (int32_t)pos;
37866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c.fChar = u_unescapeAt(uregex_ucstr_unescape_charAt, &endIndex, (int32_t)fPatternLength, (void *)fRXPat->fPattern->chunkContents);
37876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (endIndex == pos) {
37896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        error(U_REGEX_BAD_ESCAPE_SEQUENCE);
37906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
37916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fCharNum += endIndex - pos;
37926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UTEXT_SETNATIVEINDEX(fRXPat->fPattern, endIndex);
37936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
37946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t offset = 0;
37956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(fRXPat->fPattern);
37966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
37976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UTEXT_SETNATIVEINDEX(fRXPat->fPattern, pos);
37986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c.fChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context);
37996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (offset == 0) {
38016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        error(U_REGEX_BAD_ESCAPE_SEQUENCE);
38026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else if (context.lastOffset == offset) {
38036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UTEXT_PREVIOUS32(fRXPat->fPattern);
38046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else if (context.lastOffset != offset-1) {
38056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utext_moveIndex32(fRXPat->fPattern, offset - context.lastOffset - 1);
38066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
38076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fCharNum += offset;
38086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
38096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
38106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (peekCharLL() == chDigit0) {
38116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  Octal Escape, using Java Regexp Conventions
38126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    which are \0 followed by 1-3 octal digits.
38136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    Different from ICU Unescape handling of Octal, which does not
38146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    require the leading 0.
38156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  Java also has the convention of only consuming 2 octal digits if
38166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    the three digit number would be > 0xff
38176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //
38186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c.fChar = 0;
38196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                nextCharLL();    // Consume the initial 0.
38206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int index;
38216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (index=0; index<3; index++) {
38226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t ch = peekCharLL();
38236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (ch<chDigit0 || ch>chDigit7) {
38246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if (index==0) {
38256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           // \0 is not followed by any octal digits.
38266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           error(U_REGEX_BAD_ESCAPE_SEQUENCE);
38276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
38286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
38296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
38306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c.fChar <<= 3;
38316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c.fChar += ch&7;
38326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (c.fChar <= 255) {
38336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        nextCharLL();
38346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
38356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // The last digit made the number too big.  Forget we saw it.
38366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c.fChar >>= 3;
38376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
38386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
38396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c.fQuoted = TRUE;
38406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
38416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else if (peekCharLL() == chQ) {
38426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  "\Q"  enter quote mode, which will continue until "\E"
38436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fQuoteMode = TRUE;
38446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                nextCharLL();       // discard the 'Q'.
38456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                nextChar(c);        // recurse to get the real next char.
38466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
38476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else
38486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
38496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // We are in a '\' escape that will be handled by the state table scanner.
38506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Just return the backslash, but remember that the following char is to
38516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //  be taken literally.
38526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fInBackslashQuote = TRUE;
38536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
38546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
38556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // re-enable # to end-of-line comments, in case they were disabled.
38586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // They are disabled by the parser upon seeing '(?', but this lasts for
38596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  the fetching of the next character only.
38606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fEOLComments = TRUE;
38616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // putc(c.fChar, stdout);
38636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
38646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
38686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
38696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  scanNamedChar
38706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org //            Get a UChar32 from a \N{UNICODE CHARACTER NAME} in the pattern.
38716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
38726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             The scan position will be at the 'N'.  On return
38736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             the scan position should be just after the '}'
38746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
38756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             Return the UChar32
38766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
38776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
38786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32  RegexCompile::scanNamedChar() {
38796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
38806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
38816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextChar(fC);
38846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fC.fChar != chLBrace) {
38856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_PROPERTY_SYNTAX);
38866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
38876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
38886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
38896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString  charName;
38906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
38916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextChar(fC);
38926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fC.fChar == chRBrace) {
38936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
38946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
38956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fC.fChar == -1) {
38966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            error(U_REGEX_PROPERTY_SYNTAX);
38976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
38986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
38996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        charName.append(fC.fChar);
39006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char name[100];
39036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!uprv_isInvariantUString(charName.getBuffer(), charName.length()) ||
39046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         (uint32_t)charName.length()>=sizeof(name)) {
39056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // All Unicode character names have only invariant characters.
39066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The API to get a character, given a name, accepts only char *, forcing us to convert,
39076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   which requires this error check
39086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_PROPERTY_SYNTAX);
39096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
39106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    charName.extract(0, charName.length(), name, sizeof(name), US_INV);
39126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32  theChar = u_charFromName(U_UNICODE_CHAR_NAME, name, fStatus);
39146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
39156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_PROPERTY_SYNTAX);
39166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextChar(fC);      // Continue overall regex pattern processing with char after the '}'
39196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return theChar;
39206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
39216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
39236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  scanProp   Construct a UnicodeSet from the text at the current scan
39256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             position, which will be of the form \p{whaterver}
39266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             The scan position will be at the 'p' or 'P'.  On return
39286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             the scan position should be just after the '}'
39296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             Return a UnicodeSet, constructed from the \P pattern,
39316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             or NULL if the pattern is invalid.
39326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
39346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet *RegexCompile::scanProp() {
39356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeSet    *uset = NULL;
39366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
39386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
39396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(fC.fChar == chLowerP || fC.fChar == chP);
39416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool negated = (fC.fChar == chP);
39426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString propertyName;
39446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextChar(fC);
39456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fC.fChar != chLBrace) {
39466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        error(U_REGEX_PROPERTY_SYNTAX);
39476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
39486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
39506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextChar(fC);
39516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fC.fChar == chRBrace) {
39526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
39536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
39546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fC.fChar == -1) {
39556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Hit the end of the input string without finding the closing '}'
39566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            error(U_REGEX_PROPERTY_SYNTAX);
39576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return NULL;
39586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
39596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        propertyName.append(fC.fChar);
39606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uset = createSetForProperty(propertyName, negated);
39626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextChar(fC);    // Move input scan to position following the closing '}'
39636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return uset;
39646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
39656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
39676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  scanPosixProp   Construct a UnicodeSet from the text at the current scan
39696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             position, which is expected be of the form [:property expression:]
39706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             The scan position will be at the opening ':'.  On return
39726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             the scan position must be on the closing ']'
39736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             Return a UnicodeSet constructed from the pattern,
39756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             or NULL if this is not a valid POSIX-style set expression.
39766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//             If not a property expression, restore the initial scan position
39776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                (to the opening ':')
39786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//               Note:  the opening '[:' is not sufficient to guarantee that
39806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                      this is a [:property:] expression.
39816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                      [:'+=,] is a perfectly good ordinary set expression that
39826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//                              happens to include ':' as one of its characters.
39836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
39846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------------
39856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet *RegexCompile::scanPosixProp() {
39866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeSet    *uset = NULL;
39876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
39896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
39906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
39916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(fC.fChar == chColon);
39936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
39946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Save the scanner state.
39956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // TODO:  move this into the scanner, with the state encapsulated in some way.  Ticket 6062
39966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t     savedScanIndex        = fScanIndex;
39976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t     savedNextIndex        = UTEXT_GETNATIVEINDEX(fRXPat->fPattern);
39986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool       savedQuoteMode        = fQuoteMode;
39996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool       savedInBackslashQuote = fInBackslashQuote;
40006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool       savedEOLComments      = fEOLComments;
40016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t     savedLineNum          = fLineNum;
40026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int64_t     savedCharNum          = fCharNum;
40036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32     savedLastChar         = fLastChar;
40046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32     savedPeekChar         = fPeekChar;
40056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    RegexPatternChar savedfC          = fC;
40066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Scan for a closing ].   A little tricky because there are some perverse
40086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   edge cases possible.  "[:abc\Qdef:] \E]"  is a valid non-property expression,
40096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   ending on the second closing ].
40106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString propName;
40126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool         negated  = FALSE;
40136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Check for and consume the '^' in a negated POSIX property, e.g.  [:^Letter:]
40156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextChar(fC);
40166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fC.fChar == chUp) {
40176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       negated = TRUE;
40186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       nextChar(fC);
40196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
40206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Scan for the closing ":]", collecting the property name along the way.
40226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool  sawPropSetTerminator = FALSE;
40236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
40246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        propName.append(fC.fChar);
40256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextChar(fC);
40266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fC.fQuoted || fC.fChar == -1) {
40276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Escaped characters or end of input - either says this isn't a [:Property:]
40286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
40296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (fC.fChar == chColon) {
40316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            nextChar(fC);
40326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fC.fChar == chRBracket) {
40336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                sawPropSetTerminator = TRUE;
40346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
40356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
40366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
40376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
40386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (sawPropSetTerminator) {
40406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uset = createSetForProperty(propName, negated);
40416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
40426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else
40436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
40446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // No closing ":]".
40456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Restore the original scan position.
40466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  The main scanner will retry the input as a normal set expression,
40476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    not a [:Property:] expression.
40486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fScanIndex        = savedScanIndex;
40496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fQuoteMode        = savedQuoteMode;
40506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fInBackslashQuote = savedInBackslashQuote;
40516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fEOLComments      = savedEOLComments;
40526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLineNum          = savedLineNum;
40536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fCharNum          = savedCharNum;
40546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fLastChar         = savedLastChar;
40556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fPeekChar         = savedPeekChar;
40566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fC                = savedfC;
40576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UTEXT_SETNATIVEINDEX(fRXPat->fPattern, savedNextIndex);
40586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
40596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return uset;
40606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
40616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) {
40636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    set->add(0, 8).add(0x0e, 0x1b).add(0x7f, 0x9f);
40646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    addCategory(set, U_GC_CF_MASK, ec);
40656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
40666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
40686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  Create a Unicode Set from a Unicode Property expression.
40696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     This is common code underlying both \p{...} ane [:...:] expressions.
40706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     Includes trying the Java "properties" that aren't supported as
40716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     normal ICU UnicodeSet properties
40726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
40736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar posSetPrefix[] = {0x5b, 0x5c, 0x70, 0x7b, 0}; // "[\p{"
40746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar negSetPrefix[] = {0x5b, 0x5c, 0x50, 0x7b, 0}; // "[\P{"
40756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UBool negated) {
40766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString   setExpr;
40776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeSet      *set;
40786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t        usetFlags = 0;
40796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(*fStatus)) {
40816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
40826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
40836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
40846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
40856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  First try the property as we received it
40866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
40876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (negated) {
40886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setExpr.append(negSetPrefix, -1);
40896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
40906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setExpr.append(posSetPrefix, -1);
40916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
40926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setExpr.append(propName);
40936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setExpr.append(chRBrace);
40946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setExpr.append(chRBracket);
40956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
40966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        usetFlags |= USET_CASE_INSENSITIVE;
40976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
40986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    set = new UnicodeSet(setExpr, usetFlags, NULL, *fStatus);
40996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(*fStatus)) {
41006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org       return set;
41016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
41026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete set;
41036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    set = NULL;
41046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
41066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  The property as it was didn't work.
41076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //  Do [:word:]. It is not recognized as a property by UnicodeSet.  "word" not standard POSIX
41096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //     or standard Java, but many other regular expression packages do recognize it.
41106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (propName.caseCompare(UNICODE_STRING_SIMPLE("word"), 0) == 0) {
41126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *fStatus = U_ZERO_ERROR;
41136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set = new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET]));
41146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (set == NULL) {
41156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *fStatus = U_MEMORY_ALLOCATION_ERROR;
41166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return set;
41176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (negated) {
41196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->complement();
41206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return set;
41226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
41236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    Do Java fixes -
41266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //       InGreek -> InGreek or Coptic, that being the official Unicode name for that block.
41276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //       InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols.
41286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
41296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //       Note on Spaces:  either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols"
41306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //                        is accepted by Java.  The property part of the name is compared
41316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //                        case-insenstively.  The spaces must be exactly as shown, either
41326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //                        all there, or all omitted, with exactly one at each position
41336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //                        if they are present.  From checking against JDK 1.6
41346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
41356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //       This code should be removed when ICU properties support the Java  compatibility names
41366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //          (ICU 4.0?)
41376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
41386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString mPropName = propName;
41396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (mPropName.caseCompare(UNICODE_STRING_SIMPLE("InGreek"), 0) == 0) {
41406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        mPropName = UNICODE_STRING_SIMPLE("InGreek and Coptic");
41416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
41426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (mPropName.caseCompare(UNICODE_STRING_SIMPLE("InCombining Marks for Symbols"), 0) == 0 ||
41436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        mPropName.caseCompare(UNICODE_STRING_SIMPLE("InCombiningMarksforSymbols"), 0) == 0) {
41446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        mPropName = UNICODE_STRING_SIMPLE("InCombining Diacritical Marks for Symbols");
41456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
41466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
41476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        mPropName = UNICODE_STRING_SIMPLE("javaValidCodePoint");
41486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
41496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    See if the property looks like a Java "InBlockName", which
41516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //    we will recast as "Block=BlockName"
41526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
41536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const UChar IN[] = {0x49, 0x6E, 0};  // "In"
41546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const UChar BLOCK[] = {0x42, 0x6C, 0x6f, 0x63, 0x6b, 0x3d, 00};  // "Block="
41556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (mPropName.startsWith(IN, 2) && propName.length()>=3) {
41566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setExpr.truncate(4);   // Leaves "[\p{", or "[\P{"
41576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setExpr.append(BLOCK, -1);
41586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setExpr.append(UnicodeString(mPropName, 2));  // Property with the leading "In" removed.
41596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setExpr.append(chRBrace);
41606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setExpr.append(chRBracket);
41616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *fStatus = U_ZERO_ERROR;
41626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set = new UnicodeSet(setExpr, usetFlags, NULL, *fStatus);
41636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_SUCCESS(*fStatus)) {
41646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return set;
41656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete set;
41676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set = NULL;
41686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
41696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
41706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (propName.startsWith(UNICODE_STRING_SIMPLE("java")) ||
41716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        propName.compare(UNICODE_STRING_SIMPLE("all")) == 0)
41726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
41736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode localStatus = U_ZERO_ERROR;
41746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //setExpr.remove();
41756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set = new UnicodeSet();
41766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
41776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //  Try the various Java specific properties.
41786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   These all begin with "java"
41796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //
41806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (mPropName.compare(UNICODE_STRING_SIMPLE("javaDefined")) == 0) {
41816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_CN_MASK, localStatus);
41826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->complement();
41836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaDigit")) == 0) {
41856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_ND_MASK, localStatus);
41866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaIdentifierIgnorable")) == 0) {
41886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addIdentifierIgnorable(set, localStatus);
41896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaISOControl")) == 0) {
41916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->add(0, 0x1F).add(0x7F, 0x9F);
41926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
41936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaJavaIdentifierPart")) == 0) {
41946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_L_MASK, localStatus);
41956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_SC_MASK, localStatus);
41966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_PC_MASK, localStatus);
41976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_ND_MASK, localStatus);
41986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_NL_MASK, localStatus);
41996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_MC_MASK, localStatus);
42006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_MN_MASK, localStatus);
42016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addIdentifierIgnorable(set, localStatus);
42026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaJavaIdentifierStart")) == 0) {
42046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_L_MASK, localStatus);
42056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_NL_MASK, localStatus);
42066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_SC_MASK, localStatus);
42076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_PC_MASK, localStatus);
42086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLetter")) == 0) {
42106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_L_MASK, localStatus);
42116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLetterOrDigit")) == 0) {
42136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_L_MASK, localStatus);
42146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_ND_MASK, localStatus);
42156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLowerCase")) == 0) {
42176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_LL_MASK, localStatus);
42186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaMirrored")) == 0) {
42206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->applyIntPropertyValue(UCHAR_BIDI_MIRRORED, 1, localStatus);
42216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaSpaceChar")) == 0) {
42236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_Z_MASK, localStatus);
42246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaSupplementaryCodePoint")) == 0) {
42266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->add(0x10000, UnicodeSet::MAX_VALUE);
42276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaTitleCase")) == 0) {
42296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_LT_MASK, localStatus);
42306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUnicodeIdentifierStart")) == 0) {
42326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_L_MASK, localStatus);
42336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_NL_MASK, localStatus);
42346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUnicodeIdentifierPart")) == 0) {
42366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_L_MASK, localStatus);
42376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_PC_MASK, localStatus);
42386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_ND_MASK, localStatus);
42396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_NL_MASK, localStatus);
42406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_MC_MASK, localStatus);
42416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_MN_MASK, localStatus);
42426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addIdentifierIgnorable(set, localStatus);
42436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUpperCase")) == 0) {
42456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_LU_MASK, localStatus);
42466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaValidCodePoint")) == 0) {
42486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->add(0, UnicodeSet::MAX_VALUE);
42496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaWhitespace")) == 0) {
42516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addCategory(set, U_GC_Z_MASK, localStatus);
42526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->removeAll(UnicodeSet().add(0xa0).add(0x2007).add(0x202f));
42536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->add(9, 0x0d).add(0x1c, 0x1f);
42546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) {
42566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set->add(0, UnicodeSet::MAX_VALUE);
42576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_SUCCESS(localStatus) && !set->isEmpty()) {
42606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *fStatus = U_ZERO_ERROR;
42616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (usetFlags & USET_CASE_INSENSITIVE) {
42626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                set->closeOver(USET_CASE_INSENSITIVE);
42636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (negated) {
42656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                set->complement();
42666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
42676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return set;
42686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete set;
42706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set = NULL;
42716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
42726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    error(*fStatus);
42736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return NULL;
42746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
42756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
42786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
42796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//  SetEval   Part of the evaluation of [set expressions].
42806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//            Perform any pending (stacked) operations with precedence
42816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//            equal or greater to that of the next operator encountered
42826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//            in the expression.
42836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//
42846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexCompile::setEval(int32_t nextOp) {
42856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeSet *rightOperand = NULL;
42866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeSet *leftOperand  = NULL;
42876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
42886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fSetOpStack.empty()==FALSE);
42896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t pendingSetOperation = fSetOpStack.peeki();
42906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((pendingSetOperation&0xffff0000) < (nextOp&0xffff0000)) {
42916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
42926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
42936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fSetOpStack.popi();
42946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(fSetStack.empty() == FALSE);
42956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        rightOperand = (UnicodeSet *)fSetStack.peek();
42966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        switch (pendingSetOperation) {
42976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case setNegation:
42986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                rightOperand->complement();
42996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
43006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case setCaseClose:
43016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // TODO: need a simple close function.  Ticket 6065
43026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                rightOperand->closeOver(USET_CASE_INSENSITIVE);
43036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                rightOperand->removeAllStrings();
43046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
43056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case setDifference1:
43066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case setDifference2:
43076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fSetStack.pop();
43086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leftOperand = (UnicodeSet *)fSetStack.peek();
43096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leftOperand->removeAll(*rightOperand);
43106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                delete rightOperand;
43116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
43126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case setIntersection1:
43136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case setIntersection2:
43146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fSetStack.pop();
43156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leftOperand = (UnicodeSet *)fSetStack.peek();
43166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leftOperand->retainAll(*rightOperand);
43176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                delete rightOperand;
43186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
43196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            case setUnion:
43206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fSetStack.pop();
43216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leftOperand = (UnicodeSet *)fSetStack.peek();
43226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leftOperand->addAll(*rightOperand);
43236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                delete rightOperand;
43246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
43256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            default:
43266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U_ASSERT(FALSE);
43276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
43286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
43296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
43306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
43316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexCompile::setPushOp(int32_t op) {
43336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setEval(op);
43346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fSetOpStack.push(op, *fStatus);
43356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fSetStack.push(new UnicodeSet(), *fStatus);
43366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
43376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
43386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
43396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
43406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4341