1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// file: repattrn.cpp 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*************************************************************************** 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2002-2013 International Business Machines Corporation * 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and others. All rights reserved. * 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*************************************************************************** 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uclean.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h" 2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uvectr64.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexcmp.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regeximp.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// RegexPattern Default Constructor 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::RegexPattern() { 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Init all of this instances data. 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru init(); 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Copy Constructor Note: This is a rather inefficient implementation, 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// but it probably doesn't matter. 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { 45c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(); 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *this = other; 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Assignment Operator 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern &RegexPattern::operator = (const RegexPattern &other) { 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (this == &other) { 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Source and destination are the same. Don't do anything. 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Clean out any previous contents of object being assigned to. 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru zap(); 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Give target object a default initialization 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru init(); 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy simple fields 6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ( other.fPatternString == NULL ) { 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; 7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus); 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = new UnicodeString(*(other.fPatternString)); 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status); 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFlags = other.fFlags; 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLiteralText = other.fLiteralText; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = other.fDeferredStatus; 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMinMatchLen = other.fMinMatchLen; 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFrameSize = other.fFrameSize; 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDataSize = other.fDataSize; 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMaxCaptureDigits = other.fMaxCaptureDigits; 88c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fStaticSets = other.fStaticSets; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets8 = other.fStaticSets8; 90c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStartType = other.fStartType; 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringIdx = other.fInitialStringIdx; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringLen = other.fInitialStringLen; 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *fInitialChars = *other.fInitialChars; 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChar = other.fInitialChar; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *fInitialChars8 = *other.fInitialChars8; 9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNeedsAltInput = other.fNeedsAltInput; 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy the pattern. It's just values, nothing deep to copy. 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap->assign(*other.fGroupMap, fDeferredStatus); 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Copy the Unicode Sets. 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Could be made more efficient if the sets were reference counted and shared, 105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // but I doubt that pattern copying will be particularly common. 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: init() already added an empty element zero to fSets 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numSets = other.fSets->size(); 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = new Regex8BitSet[numSets]; 110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fSets8 == NULL) { 111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=1; i<numSets; i++) { 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *newSet = new UnicodeSet(*sourceSet); 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (newSet == NULL) { 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(newSet, fDeferredStatus); 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8[i] = other.fSets8[i]; 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// init Shared initialization for use by constructors. 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Bring an uninitialized RegexPattern up to a default state. 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::init() { 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFlags = 0; 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat = 0; 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLiteralText.remove(); 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = NULL; 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = NULL; 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_ZERO_ERROR; 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMinMatchLen = 0; 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFrameSize = 0; 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDataSize = 0; 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = NULL; 149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMaxCaptureDigits = 1; 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets = NULL; 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets8 = NULL; 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStartType = START_NO_INFO; 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringIdx = 0; 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringLen = 0; 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = NULL; 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChar = 0; 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = NULL; 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNeedsAltInput = FALSE; 159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = NULL; // will be set later 16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; // may be set later 16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fCompiledPat = new UVector64(fDeferredStatus); 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = new UVector32(fDeferredStatus); 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = new UVector(fDeferredStatus); 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = new UnicodeSet; 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = new Regex8BitSet; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars == NULL || fInitialChars8 == NULL) { 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Slot zero of the vector of sets is reserved. Fill it here. 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement((int32_t)0, fDeferredStatus); 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// zap Delete everything owned by this RegexPattern. 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::zap() { 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCompiledPat; 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat = NULL; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=1; i<fSets->size(); i++) { 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *s; 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s = (UnicodeSet *)fSets->elementAt(i); 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s != NULL) { 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete s; 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSets; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = NULL; 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete[] fSets8; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = NULL; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fGroupMap; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = NULL; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fInitialChars; 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = NULL; 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fInitialChars8; 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = NULL; 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern != NULL) { 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(fPattern); 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = NULL; 21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPatternString != NULL) { 21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fPatternString; 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; 21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Destructor 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::~RegexPattern() { 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru zap(); 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Clone 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruRegexPattern *RegexPattern::clone() const { 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *copy = new RegexPattern(*this); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return copy; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// operator == (comparison) Consider to patterns to be == if the 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// pattern strings and the flags are the same. 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Note that pattern strings with the same 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// characters can still be considered different. 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexPattern::operator ==(const RegexPattern &other) const { 24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (this->fPatternString != NULL && other.fPatternString != NULL) { 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *(this->fPatternString) == *(other.fPatternString); 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (this->fPattern == NULL) { 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (other.fPattern == NULL) { 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (other.fPattern != NULL) { 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(this->fPattern, 0); 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(other.fPattern, 0); 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_equals(this->fPattern, other.fPattern); 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// compile 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::compile(const UnicodeString ®ex, 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags & ~allFlags) != 0) { 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_INVALID_FLAG; 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 28883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if ((flags & UREGEX_CANON_EQ) != 0) { 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_UNIMPLEMENTED; 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *This = new RegexPattern; 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (This == NULL) { 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_MEMORY_ALLOCATION_ERROR; 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(This->fDeferredStatus)) { 29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = This->fDeferredStatus; 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete This; 30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho This->fFlags = flags; 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexCompile compiler(This, status); 30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compiler.compile(regex, pe, status); 307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete This; 31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho This = NULL; 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return This; 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile, UText mode 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((flags & ~allFlags) != 0) { 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_REGEX_INVALID_FLAG; 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if ((flags & UREGEX_CANON_EQ) != 0) { 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_REGEX_UNIMPLEMENTED; 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *This = new RegexPattern; 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (This == NULL) { 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(This->fDeferredStatus)) { 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = This->fDeferredStatus; 351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete This; 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru This->fFlags = flags; 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexCompile compiler(This, status); 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compiler.compile(regex, pe, status); 358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 359c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 360c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete This; 361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru This = NULL; 362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return This; 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// compile with default flags. 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::compile(const UnicodeString ®ex, 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode &err) 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return compile(regex, 0, pe, err); 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile with default flags, UText mode 38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 38250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return compile(regex, 0, pe, err); 38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// compile with no UParseErr parameter. 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 39550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(const UnicodeString ®ex, 39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return compile(regex, flags, pe, err); 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile with no UParseErr parameter, UText mode 40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 40750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 40850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return compile(regex, flags, pe, err); 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// flags 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t RegexPattern::flags() const { 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fFlags; 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matcher(UnicodeString, err) 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher *RegexPattern::matcher(const UnicodeString &input, 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const { 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *retMatcher = matcher(status); 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (retMatcher != NULL) { 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retMatcher->fDeferredStatus = status; 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retMatcher->reset(input); 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retMatcher; 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matcher(status) 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher *RegexPattern::matcher(UErrorCode &status) const { 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *retMatcher = NULL; 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retMatcher = new RegexMatcher(this); 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (retMatcher == NULL) { 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retMatcher; 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matches Convenience function to test for a match, starting 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// with a pattern string and a data string. 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &input, 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) {return FALSE;} 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool retVal; 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = NULL; 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = NULL; 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = RegexPattern::compile(regex, 0, pe, status); 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher = pat->matcher(input, status); 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = matcher->matches(status); 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retVal; 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// matches, UText mode 49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 49950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool U_EXPORT2 RegexPattern::matches(UText *regex, 50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *input, 50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) {return FALSE;} 50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 506b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool retVal = FALSE; 50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = NULL; 50850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *matcher = NULL; 50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat = RegexPattern::compile(regex, 0, pe, status); 511b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho matcher = pat->matcher(status); 512b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) { 513b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho matcher->reset(input); 514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retVal = matcher->matches(status); 515b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retVal; 52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// pattern 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString RegexPattern::pattern() const { 53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPatternString != NULL) { 53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *fPatternString; 53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (fPattern == NULL) { 53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UnicodeString(); 53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nativeLen = utext_nativeLength(fPattern); 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error 54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 54250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *resultChars = result.getBuffer(len16); 54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning 54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result.releaseBuffer(len16); 546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return result; 54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// patternText 55750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 55927f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexPattern::patternText(UErrorCode &status) const { 56027f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) {return NULL;} 56127f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 56227f654740f2a26ad62a5c155af9199af9e69b889claireho 56350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern != NULL) { 56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fPattern; 56550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexStaticSets::initGlobals(&status); 56750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return RegexStaticSets::gStaticSets->fEmptyText; 56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 57150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 57350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// split 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RegexPattern::split(const UnicodeString &input, 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString dest[], 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) const 58250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 58550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 58650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 58750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(this); 58850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t r = 0; 58950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check m's status to make sure all is ok. 59050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(m.fDeferredStatus)) { 59150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho r = m.split(input, dest, destCapacity, status); 59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return r; 59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 59550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// split, UText mode 59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 59950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexPattern::split(UText *input, 60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest[], 60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) const 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(this); 609c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t r = 0; 610c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check m's status to make sure all is ok. 611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_SUCCESS(m.fDeferredStatus)) { 612c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = m.split(input, dest, destCapacity, status); 613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return r; 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// dump Output the compiled form of the pattern. 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debugging function only. 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::dumpOp(int32_t index) const { 626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (void)index; // Suppress warnings in non-debug build. 627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if defined(REGEX_DEBUG) 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char * const opNames[] = {URX_OPCODE_NAMES}; 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t op = fCompiledPat->elementAti(index); 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t val = URX_VAL(op); 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t type = URX_TYPE(op); 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t pinnedType = type; 633c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) { 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pinnedType = 0; 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%4d %08x %-15s ", index, op, opNames[pinnedType]); 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (type) { 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_NOP: 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY: 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY_ALL: 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_FAIL: 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CARET: 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOLLAR: 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_G: 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_X: 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END: 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOLLAR_M: 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CARET_M: 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Types with no operand field of interest. 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_RESERVED_OP: 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_START_CAPTURE: 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END_CAPTURE: 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATE_SAVE: 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP: 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV: 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV_X: 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_B: 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_BU: 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_D: 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_Z: 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING_LEN: 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT: 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT_NG: 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP: 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP_NG: 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_RELOC_OPRND: 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_SP: 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LD_SP: 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF: 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_INP_LOC: 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMPX: 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_START: 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_END: 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF_I: 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_START: 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_CONT: 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_END: 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_CONT: 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_END: 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_C: 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_DOT_I: 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // types with an integer operand field. 686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%d", val); 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR: 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR_I: 691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", val<256?val:'?'); 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING: 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING_I: 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lengthOp = fCompiledPat->elementAti(index+1); 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length = URX_VAL(lengthOp); 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=val; i<val+length; i++) { 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = fLiteralText[i]; 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < 32 || c >= 256) {c = '.';} 704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", c); 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_SETREF: 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_SR_I: 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s; 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(s, TRUE); 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<s.length(); i++) { 716fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", s.charAt(i)); 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATIC_SETREF: 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STAT_SETREF_N: 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s; 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (val & URX_NEG_SET) { 726fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("NOT "); 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru val &= ~URX_NEG_SET; 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = fStaticSets[val]; 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(s, TRUE); 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<s.length(); i++) { 732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", s.charAt(i)); 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 737c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("??????"); 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 742fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n"); 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 747fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid RegexPattern::dumpPattern() const { 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(REGEX_DEBUG) 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int index; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("Original Pattern: "); 753fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = utext_next32From(fPattern, 0); 75450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 75550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<32 || c>256) { 75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = '.'; 75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", c); 759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius c = UTEXT_NEXT32(fPattern); 761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n"); 763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Min Match Length: %d\n", fMinMatchLen); 764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); 765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (fStartType == START_STRING) { 766fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Initial match string: \""); 767fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) { 768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", fLiteralText[i]); // TODO: non-printables, surrogates. 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 770fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\"\n"); 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (fStartType == START_SET) { 773fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t numSetChars = fInitialChars->size(); 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (numSetChars > 20) { 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru numSetChars = 20; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 777fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Match First Chars : "); 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<numSetChars; i++) { 779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = fInitialChars->charAt(i); 780c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (0x20<c && c <0x7e) { 781fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c ", c); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 783fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%#x ", c); 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 786fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (numSetChars < fInitialChars->size()) { 787fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" ..."); 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 789fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n"); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 791fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (fStartType == START_CHAR) { 792fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" First char of Match : "); 793fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (0x20 < fInitialChar && fInitialChar<0x7e) { 794fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c\n", fInitialChar); 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 796fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%#x\n", fInitialChar); 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 800fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\nIndex Binary Type Operand\n" \ 801fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "-------------------------------------------\n"); 802fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for (index = 0; index<fCompiledPat->size(); index++) { 803fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dumpOp(index); 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 805fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n\n"); 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 807fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 815