10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// file: repattrn.cpp 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*************************************************************************** 88de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert* Copyright (C) 2002-2016 International Business Machines Corporation 98de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert* and others. All rights reserved. 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*************************************************************************** 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uclean.h" 198de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert#include "cmemory.h" 208de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert#include "cstr.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#include "uhash.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h" 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uvectr64.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexcmp.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regeximp.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// RegexPattern Default Constructor 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::RegexPattern() { 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Init all of this instances data. 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru init(); 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Copy Constructor Note: This is a rather inefficient implementation, 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// but it probably doesn't matter. 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { 50c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(); 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *this = other; 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Assignment Operator 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern &RegexPattern::operator = (const RegexPattern &other) { 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (this == &other) { 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Source and destination are the same. Don't do anything. 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Clean out any previous contents of object being assigned to. 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru zap(); 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Give target object a default initialization 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru init(); 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy simple fields 741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fDeferredStatus = other.fDeferredStatus; 751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(fDeferredStatus)) { 771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return *this; 781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (other.fPatternString == NULL) { 8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; 821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus); 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = new UnicodeString(*(other.fPatternString)); 851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fPatternString == NULL) { 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 871b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus); 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(fDeferredStatus)) { 921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return *this; 931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFlags = other.fFlags; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLiteralText = other.fLiteralText; 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMinMatchLen = other.fMinMatchLen; 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFrameSize = other.fFrameSize; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDataSize = other.fDataSize; 100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fStaticSets = other.fStaticSets; 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets8 = other.fStaticSets8; 102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStartType = other.fStartType; 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringIdx = other.fInitialStringIdx; 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringLen = other.fInitialStringLen; 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *fInitialChars = *other.fInitialChars; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChar = other.fInitialChar; 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *fInitialChars8 = *other.fInitialChars8; 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNeedsAltInput = other.fNeedsAltInput; 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy the pattern. It's just values, nothing deep to copy. 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap->assign(*other.fGroupMap, fDeferredStatus); 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Copy the Unicode Sets. 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Could be made more efficient if the sets were reference counted and shared, 117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // but I doubt that pattern copying will be particularly common. 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: init() already added an empty element zero to fSets 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numSets = other.fSets->size(); 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = new Regex8BitSet[numSets]; 122c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fSets8 == NULL) { 123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=1; i<numSets; i++) { 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *newSet = new UnicodeSet(*sourceSet); 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (newSet == NULL) { 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(newSet, fDeferredStatus); 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8[i] = other.fSets8[i]; 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Copy the named capture group hash map. 1411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t hashPos = UHASH_FIRST; 1421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) { 1431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(fDeferredStatus)) { 1441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 1451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 1461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer; 1471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UnicodeString *key = new UnicodeString(*name); 1481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t val = hashEl->value.integer; 1491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (key == NULL) { 1501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 1511b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 1521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus); 1531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 1541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// init Shared initialization for use by constructors. 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Bring an uninitialized RegexPattern up to a default state. 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::init() { 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFlags = 0; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat = 0; 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLiteralText.remove(); 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = NULL; 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = NULL; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_ZERO_ERROR; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMinMatchLen = 0; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFrameSize = 0; 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDataSize = 0; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = NULL; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets = NULL; 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets8 = NULL; 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStartType = START_NO_INFO; 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringIdx = 0; 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringLen = 0; 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = NULL; 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChar = 0; 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = NULL; 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNeedsAltInput = FALSE; 1851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fNamedCaptureMap = NULL; 186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = NULL; // will be set later 18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; // may be set later 18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fCompiledPat = new UVector64(fDeferredStatus); 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = new UVector32(fDeferredStatus); 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = new UVector(fDeferredStatus); 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = new UnicodeSet; 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = new Regex8BitSet; 1941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash function 1951b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_compareUnicodeString, // Key comparator function 1961b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_compareLong, // Value comparator function 1971b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert &fDeferredStatus); 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || 2021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) { 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Slot zero of the vector of sets is reserved. Fill it here. 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement((int32_t)0, fDeferredStatus); 2091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 2101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // fNamedCaptureMap owns its key strings, type (UnicodeString *) 2111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject); 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// zap Delete everything owned by this RegexPattern. 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::zap() { 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCompiledPat; 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat = NULL; 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=1; i<fSets->size(); i++) { 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *s; 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s = (UnicodeSet *)fSets->elementAt(i); 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s != NULL) { 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete s; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSets; 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = NULL; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete[] fSets8; 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = NULL; 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fGroupMap; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = NULL; 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fInitialChars; 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = NULL; 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fInitialChars8; 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = NULL; 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern != NULL) { 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(fPattern); 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = NULL; 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPatternString != NULL) { 24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fPatternString; 24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; 24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_close(fNamedCaptureMap); 2501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fNamedCaptureMap = NULL; 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Destructor 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::~RegexPattern() { 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru zap(); 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Clone 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruRegexPattern *RegexPattern::clone() const { 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *copy = new RegexPattern(*this); 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return copy; 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// operator == (comparison) Consider to patterns to be == if the 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// pattern strings and the flags are the same. 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Note that pattern strings with the same 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// characters can still be considered different. 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexPattern::operator ==(const RegexPattern &other) const { 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (this->fPatternString != NULL && other.fPatternString != NULL) { 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *(this->fPatternString) == *(other.fPatternString); 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (this->fPattern == NULL) { 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (other.fPattern == NULL) { 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (other.fPattern != NULL) { 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(this->fPattern, 0); 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(other.fPattern, 0); 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_equals(this->fPattern, other.fPattern); 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// compile 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::compile(const UnicodeString ®ex, 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags & ~allFlags) != 0) { 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_INVALID_FLAG; 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 32483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if ((flags & UREGEX_CANON_EQ) != 0) { 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_UNIMPLEMENTED; 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *This = new RegexPattern; 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (This == NULL) { 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_MEMORY_ALLOCATION_ERROR; 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(This->fDeferredStatus)) { 33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = This->fDeferredStatus; 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete This; 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho This->fFlags = flags; 340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexCompile compiler(This, status); 34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compiler.compile(regex, pe, status); 343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete This; 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho This = NULL; 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return This; 35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile, UText mode 35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 35650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 35750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) 36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((flags & ~allFlags) != 0) { 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_REGEX_INVALID_FLAG; 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if ((flags & UREGEX_CANON_EQ) != 0) { 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_REGEX_UNIMPLEMENTED; 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *This = new RegexPattern; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (This == NULL) { 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(This->fDeferredStatus)) { 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = This->fDeferredStatus; 387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete This; 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru This->fFlags = flags; 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexCompile compiler(This, status); 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compiler.compile(regex, pe, status); 394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete This; 397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru This = NULL; 398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return This; 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 402c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// compile with default flags. 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::compile(const UnicodeString ®ex, 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 409c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode &err) 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return compile(regex, 0, pe, err); 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile with default flags, UText mode 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 41950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return compile(regex, 0, pe, err); 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// compile with no UParseErr parameter. 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(const UnicodeString ®ex, 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return compile(regex, flags, pe, err); 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile with no UParseErr parameter, UText mode 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return compile(regex, flags, pe, err); 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// flags 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t RegexPattern::flags() const { 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fFlags; 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matcher(UnicodeString, err) 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher *RegexPattern::matcher(const UnicodeString &input, 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const { 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *retMatcher = matcher(status); 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (retMatcher != NULL) { 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retMatcher->fDeferredStatus = status; 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retMatcher->reset(input); 47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retMatcher; 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matcher(status) 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher *RegexPattern::matcher(UErrorCode &status) const { 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *retMatcher = NULL; 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retMatcher = new RegexMatcher(this); 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (retMatcher == NULL) { 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retMatcher; 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matches Convenience function to test for a match, starting 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// with a pattern string and a data string. 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &input, 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) {return FALSE;} 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool retVal; 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = NULL; 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = NULL; 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = RegexPattern::compile(regex, 0, pe, status); 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher = pat->matcher(input, status); 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = matcher->matches(status); 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retVal; 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// matches, UText mode 53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 53550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool U_EXPORT2 RegexPattern::matches(UText *regex, 53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *input, 53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) {return FALSE;} 54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 542b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool retVal = FALSE; 54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = NULL; 54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *matcher = NULL; 54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat = RegexPattern::compile(regex, 0, pe, status); 547b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho matcher = pat->matcher(status); 548b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) { 549b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho matcher->reset(input); 550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retVal = matcher->matches(status); 551b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retVal; 55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 55750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 55950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// pattern 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString RegexPattern::pattern() const { 56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPatternString != NULL) { 56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *fPatternString; 57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (fPattern == NULL) { 57150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UnicodeString(); 57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 57350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nativeLen = utext_nativeLength(fPattern); 57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error 57650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *resultChars = result.getBuffer(len16); 58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning 58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result.releaseBuffer(len16); 582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return result; 58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// patternText 59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 59527f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexPattern::patternText(UErrorCode &status) const { 59627f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) {return NULL;} 59727f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 59827f654740f2a26ad62a5c155af9199af9e69b889claireho 59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern != NULL) { 60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fPattern; 60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexStaticSets::initGlobals(&status); 60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return RegexStaticSets::gStaticSets->fEmptyText; 60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 60550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 60750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//-------------------------------------------------------------------------------- 6091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// 6101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// groupNumberFromName() 6111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// 6121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//-------------------------------------------------------------------------------- 6131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertint32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const { 6141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(status)) { 6151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return 0; 6161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 6171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 6181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // No need to explicitly check for syntactically valid names. 6191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Invalid ones will never be in the map, and the lookup will fail. 6201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 6211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t number = uhash_geti(fNamedCaptureMap, &groupName); 6221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (number == 0) { 6231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 6241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 6251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return number; 6261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert} 6271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 6281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertint32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const { 6291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(status)) { 6301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return 0; 6311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 6321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UnicodeString name(groupName, nameLength, US_INV); 6331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return groupNumberFromName(name, status); 6341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert} 6351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 63650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// split 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RegexPattern::split(const UnicodeString &input, 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString dest[], 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) const 64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 65050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 65150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(this); 65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t r = 0; 65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check m's status to make sure all is ok. 65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(m.fDeferredStatus)) { 65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho r = m.split(input, dest, destCapacity, status); 65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 65750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return r; 65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 66050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// split, UText mode 66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 66350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexPattern::split(UText *input, 66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest[], 66550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) const 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(this); 673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t r = 0; 674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check m's status to make sure all is ok. 675c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_SUCCESS(m.fDeferredStatus)) { 676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = m.split(input, dest, destCapacity, status); 677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return r; 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// dump Output the compiled form of the pattern. 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debugging function only. 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::dumpOp(int32_t index) const { 689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (void)index; // Suppress warnings in non-debug build. 690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if defined(REGEX_DEBUG) 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char * const opNames[] = {URX_OPCODE_NAMES}; 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t op = fCompiledPat->elementAti(index); 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t val = URX_VAL(op); 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t type = URX_TYPE(op); 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t pinnedType = type; 6968de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) { 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pinnedType = 0; 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 700fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%4d %08x %-15s ", index, op, opNames[pinnedType]); 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (type) { 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_NOP: 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY: 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY_ALL: 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_FAIL: 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CARET: 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOLLAR: 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_G: 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_X: 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END: 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOLLAR_M: 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CARET_M: 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Types with no operand field of interest. 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_RESERVED_OP: 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_START_CAPTURE: 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END_CAPTURE: 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATE_SAVE: 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP: 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV: 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV_X: 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_B: 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_BU: 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_D: 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_Z: 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING_LEN: 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT: 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT_NG: 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP: 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP_NG: 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_RELOC_OPRND: 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_SP: 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LD_SP: 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF: 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_INP_LOC: 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMPX: 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_START: 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_END: 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF_I: 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_START: 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_CONT: 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_END: 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_CONT: 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_END: 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_C: 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_DOT_I: 7481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_H: 7491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_R: 7501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_V: 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // types with an integer operand field. 752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%d", val); 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 754c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR: 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR_I: 7578de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert if (val < 0x20) { 7588de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf("%#x", val); 7598de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert } else { 7608de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf("'%s'", CStr(UnicodeString(val))()); 7618de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert } 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING: 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING_I: 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lengthOp = fCompiledPat->elementAti(index+1); 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length = URX_VAL(lengthOp); 7708de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert UnicodeString str(fLiteralText, val, length); 7718de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf("%s", CStr(str)()); 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_SETREF: 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_SR_I: 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(s, TRUE); 7818de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf("%s", CStr(s)()); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATIC_SETREF: 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STAT_SETREF_N: 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s; 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (val & URX_NEG_SET) { 790fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("NOT "); 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru val &= ~URX_NEG_SET; 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = fStaticSets[val]; 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(s, TRUE); 7958de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf("%s", CStr(s)()); 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 801fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("??????"); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 804fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n"); 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 806fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 809fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid RegexPattern::dumpPattern() const { 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(REGEX_DEBUG) 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int index; 812fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 8138de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert UnicodeString patStr; 8148de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) { 8158de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert patStr.append(c); 816fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 8178de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf("Original Pattern: \"%s\"\n", CStr(patStr)()); 818fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Min Match Length: %d\n", fMinMatchLen); 819fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); 820fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (fStartType == START_STRING) { 8218de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen); 8228de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf(" Initial match string: \"%s\"\n", CStr(initialString)()); 823fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (fStartType == START_SET) { 8248de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert UnicodeString s; 8258de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert fInitialChars->toPattern(s, TRUE); 8268de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf(" Match First Chars: %s\n", CStr(s)()); 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (fStartType == START_CHAR) { 8298de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf(" First char of Match: "); 8308de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert if (fInitialChar > 0x20) { 8318de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf("'%s'\n", CStr(UnicodeString(fInitialChar))()); 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 833fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%#x\n", fInitialChar); 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert printf("Named Capture Groups:\n"); 8381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (uhash_count(fNamedCaptureMap) == 0) { 8391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert printf(" None\n"); 8401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 8411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t pos = UHASH_FIRST; 8421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const UHashElement *el = NULL; 8431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { 8441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const UnicodeString *name = (const UnicodeString *)el->key.pointer; 8451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t number = el->value.integer; 8468de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert printf(" %d\t%s\n", number, CStr(*name)()); 8471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 8481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 8491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\nIndex Binary Type Operand\n" \ 851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "-------------------------------------------\n"); 852fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for (index = 0; index<fCompiledPat->size(); index++) { 853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dumpOp(index); 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n\n"); 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 865