10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
4c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//  file:  repattrn.cpp
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru***************************************************************************
88de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert*   Copyright (C) 2002-2016 International Business Machines Corporation
98de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert*   and others. All rights reserved.
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru***************************************************************************
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uclean.h"
198de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert#include "cmemory.h"
208de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert#include "cstr.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h"
221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#include "uhash.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h"
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uvectr64.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexcmp.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regeximp.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h"
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    RegexPattern    Default Constructor
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::RegexPattern() {
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Init all of this instances data.
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    init();
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Copy Constructor        Note:  This is a rather inefficient implementation,
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                                  but it probably doesn't matter.
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
50c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init();
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *this = other;
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Assignment Operator
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (this == &other) {
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Source and destination are the same.  Don't do anything.
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Clean out any previous contents of object being assigned to.
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    zap();
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Give target object a default initialization
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    init();
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Copy simple fields
741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    fDeferredStatus   = other.fDeferredStatus;
751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(fDeferredStatus)) {
771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return *this;
781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (other.fPatternString == NULL) {
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fPatternString = NULL;
821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fPatternString = new UnicodeString(*(other.fPatternString));
851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if (fPatternString == NULL) {
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
871b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        } else {
881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(fDeferredStatus)) {
921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return *this;
931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fFlags            = other.fFlags;
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fLiteralText      = other.fLiteralText;
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fMinMatchLen      = other.fMinMatchLen;
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fFrameSize        = other.fFrameSize;
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fDataSize         = other.fDataSize;
100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fStaticSets       = other.fStaticSets;
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fStaticSets8      = other.fStaticSets8;
102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fStartType        = other.fStartType;
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialStringIdx = other.fInitialStringIdx;
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialStringLen = other.fInitialStringLen;
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *fInitialChars    = *other.fInitialChars;
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialChar      = other.fInitialChar;
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *fInitialChars8   = *other.fInitialChars8;
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fNeedsAltInput    = other.fNeedsAltInput;
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Copy the pattern.  It's just values, nothing deep to copy.
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //  Copy the Unicode Sets.
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    Could be made more efficient if the sets were reference counted and shared,
117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //    but I doubt that pattern copying will be particularly common.
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    Note:  init() already added an empty element zero to fSets
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t  numSets = other.fSets->size();
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fSets8 = new Regex8BitSet[numSets];
122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (fSets8 == NULL) {
123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	return *this;
125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=1; i<numSets; i++) {
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(fDeferredStatus)) {
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return *this;
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (newSet == NULL) {
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fSets->addElement(newSet, fDeferredStatus);
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fSets8[i] = other.fSets8[i];
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    // Copy the named capture group hash map.
1411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t hashPos = UHASH_FIRST;
1421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
1431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if (U_FAILURE(fDeferredStatus)) {
1441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            break;
1451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
1461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
1471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        UnicodeString *key = new UnicodeString(*name);
1481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        int32_t val = hashEl->value.integer;
1491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if (key == NULL) {
1501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
1511b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        } else {
1521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
1531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
1541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    init        Shared initialization for use by constructors.
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                Bring an uninitialized RegexPattern up to a default state.
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::init() {
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fFlags            = 0;
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fCompiledPat      = 0;
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fLiteralText.remove();
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fSets             = NULL;
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fSets8            = NULL;
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fDeferredStatus   = U_ZERO_ERROR;
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fMinMatchLen      = 0;
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fFrameSize        = 0;
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fDataSize         = 0;
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fGroupMap         = NULL;
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fStaticSets       = NULL;
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fStaticSets8      = NULL;
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fStartType        = START_NO_INFO;
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialStringIdx = 0;
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialStringLen = 0;
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialChars     = NULL;
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialChar      = 0;
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialChars8    = NULL;
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fNeedsAltInput    = FALSE;
1851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    fNamedCaptureMap  = NULL;
186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPattern          = NULL; // will be set later
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPatternString    = NULL; // may be set later
18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fCompiledPat      = new UVector64(fDeferredStatus);
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fGroupMap         = new UVector32(fDeferredStatus);
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fSets             = new UVector(fDeferredStatus);
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialChars     = new UnicodeSet;
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialChars8    = new Regex8BitSet;
1941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    fNamedCaptureMap  = uhash_open(uhash_hashUnicodeString,     // Key hash function
1951b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                   uhash_compareUnicodeString,  // Key comparator function
1961b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                   uhash_compareLong,           // Value comparator function
1971b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                   &fDeferredStatus);
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
2021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Slot zero of the vector of sets is reserved.  Fill it here.
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fSets->addElement((int32_t)0, fDeferredStatus);
2091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
2101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    // fNamedCaptureMap owns its key strings, type (UnicodeString *)
2111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   zap            Delete everything owned by this RegexPattern.
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::zap() {
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fCompiledPat;
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fCompiledPat = NULL;
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i;
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=1; i<fSets->size(); i++) {
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet *s;
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s = (UnicodeSet *)fSets->elementAt(i);
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (s != NULL) {
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete s;
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fSets;
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fSets = NULL;
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete[] fSets8;
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fSets8 = NULL;
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fGroupMap;
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fGroupMap = NULL;
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fInitialChars;
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialChars = NULL;
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fInitialChars8;
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fInitialChars8 = NULL;
24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fPattern != NULL) {
24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(fPattern);
24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fPattern = NULL;
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fPatternString != NULL) {
24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete fPatternString;
24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fPatternString = NULL;
24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    uhash_close(fNamedCaptureMap);
2501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    fNamedCaptureMap = NULL;
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Destructor
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::~RegexPattern() {
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    zap();
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Clone
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruRegexPattern  *RegexPattern::clone() const {
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern  *copy = new RegexPattern(*this);
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return copy;
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   operator ==   (comparison)    Consider to patterns to be == if the
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                                 pattern strings and the flags are the same.
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                                 Note that pattern strings with the same
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                                 characters can still be considered different.
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool   RegexPattern::operator ==(const RegexPattern &other) const {
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (this->fPatternString != NULL && other.fPatternString != NULL) {
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return *(this->fPatternString) == *(other.fPatternString);
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (this->fPattern == NULL) {
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (other.fPattern == NULL) {
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return TRUE;
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (other.fPattern != NULL) {
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(this->fPattern, 0);
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(other.fPattern, 0);
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return utext_equals(this->fPattern, other.fPattern);
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return FALSE;
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   compile
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::compile(const UnicodeString &regex,
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      uint32_t             flags,
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UParseError          &pe,
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UErrorCode           &status)
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if ((flags & ~allFlags) != 0) {
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_INVALID_FLAG;
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
32483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if ((flags & UREGEX_CANON_EQ) != 0) {
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_UNIMPLEMENTED;
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *This = new RegexPattern;
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (This == NULL) {
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_MEMORY_ALLOCATION_ERROR;
33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(This->fDeferredStatus)) {
33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = This->fDeferredStatus;
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete This;
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    This->fFlags = flags;
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexCompile     compiler(This, status);
34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    compiler.compile(regex, pe, status);
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete This;
34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        This = NULL;
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return This;
35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   compile, UText mode
35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText                *regex,
35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      uint32_t             flags,
35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      UParseError          &pe,
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      UErrorCode           &status)
36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                              UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                              UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((flags & ~allFlags) != 0) {
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_REGEX_INVALID_FLAG;
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
37583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if ((flags & UREGEX_CANON_EQ) != 0) {
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_REGEX_UNIMPLEMENTED;
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern *This = new RegexPattern;
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (This == NULL) {
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(This->fDeferredStatus)) {
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = This->fDeferredStatus;
387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        delete This;
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    This->fFlags = flags;
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexCompile     compiler(This, status);
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    compiler.compile(regex, pe, status);
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        delete This;
397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        This = NULL;
398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return This;
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   compile with default flags.
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::compile(const UnicodeString &regex,
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UParseError         &pe,
409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                      UErrorCode          &err)
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return compile(regex, 0, pe, err);
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   compile with default flags, UText mode
41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
41850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2
41950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText               *regex,
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      UParseError         &pe,
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      UErrorCode          &err)
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return compile(regex, 0, pe, err);
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   compile with no UParseErr parameter.
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(const UnicodeString &regex,
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      uint32_t             flags,
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      UErrorCode          &err)
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError pe;
436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return compile(regex, flags, pe, err);
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   compile with no UParseErr parameter, UText mode
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText                *regex,
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      uint32_t             flags,
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      UErrorCode           &err)
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError pe;
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return compile(regex, flags, pe, err);
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   flags
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t RegexPattern::flags() const {
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return fFlags;
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   matcher(UnicodeString, err)
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher *RegexPattern::matcher(const UnicodeString &input,
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    UErrorCode          &status)  const {
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher    *retMatcher = matcher(status);
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (retMatcher != NULL) {
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retMatcher->fDeferredStatus = status;
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retMatcher->reset(input);
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return retMatcher;
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   matcher(status)
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher    *retMatcher = NULL;
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    retMatcher = new RegexMatcher(this);
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (retMatcher == NULL) {
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return retMatcher;
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   matches        Convenience function to test for a match, starting
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                  with a pattern string and a data string.
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const UnicodeString   &input,
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UParseError     &pe,
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode      &status) {
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {return FALSE;}
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool         retVal;
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexPattern *pat     = NULL;
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher *matcher = NULL;
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat     = RegexPattern::compile(regex, 0, pe, status);
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    matcher = pat->matcher(input, status);
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    retVal  = matcher->matches(status);
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete matcher;
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete pat;
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return retVal;
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   matches, UText mode
53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
53550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool U_EXPORT2 RegexPattern::matches(UText                *regex,
53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText           *input,
53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UParseError     &pe,
53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UErrorCode      &status) {
53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {return FALSE;}
54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
542b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UBool         retVal  = FALSE;
54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexPattern *pat     = NULL;
54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher *matcher = NULL;
54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pat     = RegexPattern::compile(regex, 0, pe, status);
547b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    matcher = pat->matcher(status);
548b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_SUCCESS(status)) {
549b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        matcher->reset(input);
550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        retVal  = matcher->matches(status);
551b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete matcher;
55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete pat;
55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return retVal;
55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
55750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
55950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   pattern
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString RegexPattern::pattern() const {
56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fPatternString != NULL) {
56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return *fPatternString;
57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if (fPattern == NULL) {
57150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return UnicodeString();
57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
57350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int64_t nativeLen = utext_nativeLength(fPattern);
57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
57650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString result;
577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ZERO_ERROR;
57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar *resultChars = result.getBuffer(len16);
58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result.releaseBuffer(len16);
582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return result;
58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   patternText
59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------
59527f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexPattern::patternText(UErrorCode      &status) const {
59627f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {return NULL;}
59727f654740f2a26ad62a5c155af9199af9e69b889claireho    status = U_ZERO_ERROR;
59827f654740f2a26ad62a5c155af9199af9e69b889claireho
59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fPattern != NULL) {
60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return fPattern;
60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        RegexStaticSets::initGlobals(&status);
60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return RegexStaticSets::gStaticSets->fEmptyText;
60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
60550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
60750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//--------------------------------------------------------------------------------
6091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//
6101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//  groupNumberFromName()
6111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//
6121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//--------------------------------------------------------------------------------
6131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertint32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
6141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(status)) {
6151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return 0;
6161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
6171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
6181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    // No need to explicitly check for syntactically valid names.
6191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    // Invalid ones will never be in the map, and the lookup will fail.
6201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
6211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
6221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (number == 0) {
6231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
6241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
6251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    return number;
6261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert}
6271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
6281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertint32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
6291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(status)) {
6301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return 0;
6311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
6321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    UnicodeString name(groupName, nameLength, US_INV);
6331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    return groupNumberFromName(name, status);
6341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert}
6351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
63650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------
63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   split
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t  RegexPattern::split(const UnicodeString &input,
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString    dest[],
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t          destCapacity,
64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode      &status) const
64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{
64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
65050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
65150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    RegexMatcher  m(this);
65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t r = 0;
65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Check m's status to make sure all is ok.
65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_SUCCESS(m.fDeferredStatus)) {
65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    	r = m.split(input, dest, destCapacity, status);
65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
65750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return r;
65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
66050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   split, UText mode
66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
66350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t  RegexPattern::split(UText *input,
66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText           *dest[],
66550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t          destCapacity,
66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode      &status) const
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RegexMatcher  m(this);
673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t r = 0;
674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Check m's status to make sure all is ok.
675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_SUCCESS(m.fDeferredStatus)) {
676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	r = m.split(input, dest, destCapacity, status);
677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return r;
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   dump    Output the compiled form of the pattern.
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//           Debugging function only.
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------------
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid   RegexPattern::dumpOp(int32_t index) const {
689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    (void)index;  // Suppress warnings in non-debug build.
690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if defined(REGEX_DEBUG)
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char * const opNames[] = {URX_OPCODE_NAMES};
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t op          = fCompiledPat->elementAti(index);
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t val         = URX_VAL(op);
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t type        = URX_TYPE(op);
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t pinnedType  = type;
6968de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pinnedType = 0;
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
700fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (type) {
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_NOP:
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_DOTANY:
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_DOTANY_ALL:
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_FAIL:
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_CARET:
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_DOLLAR:
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_BACKSLASH_G:
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_BACKSLASH_X:
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_END:
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_DOLLAR_M:
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_CARET_M:
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Types with no operand field of interest.
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_RESERVED_OP:
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_START_CAPTURE:
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_END_CAPTURE:
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_STATE_SAVE:
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_JMP:
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_JMP_SAV:
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_JMP_SAV_X:
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_BACKSLASH_B:
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_BACKSLASH_BU:
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_BACKSLASH_D:
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_BACKSLASH_Z:
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_STRING_LEN:
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_CTR_INIT:
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_CTR_INIT_NG:
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_CTR_LOOP:
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_CTR_LOOP_NG:
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_RELOC_OPRND:
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_STO_SP:
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LD_SP:
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_BACKREF:
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_STO_INP_LOC:
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_JMPX:
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LA_START:
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LA_END:
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_BACKREF_I:
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LB_START:
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LB_CONT:
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LB_END:
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LBN_CONT:
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LBN_END:
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LOOP_C:
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LOOP_DOT_I:
7481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    case URX_BACKSLASH_H:
7491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    case URX_BACKSLASH_R:
7501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    case URX_BACKSLASH_V:
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // types with an integer operand field.
752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("%d", val);
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_ONECHAR:
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_ONECHAR_I:
7578de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        if (val < 0x20) {
7588de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert            printf("%#x", val);
7598de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        } else {
7608de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert            printf("'%s'", CStr(UnicodeString(val))());
7618de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        }
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_STRING:
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_STRING_I:
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length = URX_VAL(lengthOp);
7708de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert            UnicodeString str(fLiteralText, val, length);
7718de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert            printf("%s", CStr(str)());
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_SETREF:
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_LOOP_SR_I:
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString s;
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            set->toPattern(s, TRUE);
7818de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert            printf("%s", CStr(s)());
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_STATIC_SETREF:
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case URX_STAT_SETREF_N:
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString s;
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (val & URX_NEG_SET) {
790fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                printf("NOT ");
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                val &= ~URX_NEG_SET;
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet *set = fStaticSets[val];
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            set->toPattern(s, TRUE);
7958de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert            printf("%s", CStr(s)());
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
801fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        printf("??????");
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
804fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    printf("\n");
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
806fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
809fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid RegexPattern::dumpPattern() const {
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(REGEX_DEBUG)
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int      index;
812fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
8138de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    UnicodeString patStr;
8148de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
8158de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        patStr.append(c);
816fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
8178de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
818fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    printf("   Min Match Length:  %d\n", fMinMatchLen);
819fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
820fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (fStartType == START_STRING) {
8218de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
8228de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        printf("   Initial match string: \"%s\"\n", CStr(initialString)());
823fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (fStartType == START_SET) {
8248de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        UnicodeString s;
8258de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        fInitialChars->toPattern(s, TRUE);
8268de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        printf("    Match First Chars: %s\n", CStr(s)());
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (fStartType == START_CHAR) {
8298de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        printf("    First char of Match: ");
8308de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        if (fInitialChar > 0x20) {
8318de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert                printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
833fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                printf("%#x\n", fInitialChar);
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    printf("Named Capture Groups:\n");
8381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (uhash_count(fNamedCaptureMap) == 0) {
8391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        printf("   None\n");
8401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    } else {
8411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        int32_t pos = UHASH_FIRST;
8421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        const UHashElement *el = NULL;
8431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
8441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            const UnicodeString *name = (const UnicodeString *)el->key.pointer;
8451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            int32_t number = el->value.integer;
8468de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert            printf("   %d\t%s\n", number, CStr(*name)());
8471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
8481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
8491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    printf("\nIndex   Binary     Type             Operand\n" \
851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius           "-------------------------------------------\n");
852fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for (index = 0; index<fCompiledPat->size(); index++) {
853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dumpOp(index);
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    printf("\n\n");
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
865