1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// file: repattrn.cpp 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*************************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2002-2010 International Business Machines Corporation * 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* and others. All rights reserved. * 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*************************************************************************** 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_REGULAR_EXPRESSIONS 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/regex.h" 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uclean.h" 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uassert.h" 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvector.h" 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvectr32.h" 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uvectr64.h" 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "regexcmp.h" 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "regeximp.h" 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "regexst.h" 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// RegexPattern Default Constructor 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::RegexPattern() { 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_init(&status); 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Init all of this instances data. 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Copy Constructor Note: This is a rather inefficient implementation, 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// but it probably doesn't matter. 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *this = other; 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Assignment Operator 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern &RegexPattern::operator = (const RegexPattern &other) { 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (this == &other) { 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Source and destination are the same. Don't do anything. 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *this; 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Clean out any previous contents of object being assigned to. 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) zap(); 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Give target object a default initialization 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Copy simple fields 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( other.fPatternString == NULL ) { 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPatternString = NULL; 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus); 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPatternString = new UnicodeString(*(other.fPatternString)); 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status); 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *this; 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fFlags = other.fFlags; 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLiteralText = other.fLiteralText; 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDeferredStatus = other.fDeferredStatus; 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMinMatchLen = other.fMinMatchLen; 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fFrameSize = other.fFrameSize; 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDataSize = other.fDataSize; 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMaxCaptureDigits = other.fMaxCaptureDigits; 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fStaticSets = other.fStaticSets; 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fStaticSets8 = other.fStaticSets8; 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fStartType = other.fStartType; 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialStringIdx = other.fInitialStringIdx; 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialStringLen = other.fInitialStringLen; 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *fInitialChars = *other.fInitialChars; 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChar = other.fInitialChar; 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *fInitialChars8 = *other.fInitialChars8; 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNeedsAltInput = other.fNeedsAltInput; 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Copy the pattern. It's just values, nothing deep to copy. 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fGroupMap->assign(*other.fGroupMap, fDeferredStatus); 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Copy the Unicode Sets. 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Could be made more efficient if the sets were reference counted and shared, 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // but I doubt that pattern copying will be particularly common. 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Note: init() already added an empty element zero to fSets 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i; 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t numSets = other.fSets->size(); 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets8 = new Regex8BitSet[numSets]; 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSets8 == NULL) { 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *this; 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=1; i<numSets; i++) { 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(fDeferredStatus)) { 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *this; 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *newSet = new UnicodeSet(*sourceSet); 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (newSet == NULL) { 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement(newSet, fDeferredStatus); 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets8[i] = other.fSets8[i]; 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *this; 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// init Shared initialization for use by constructors. 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Bring an uninitialized RegexPattern up to a default state. 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RegexPattern::init() { 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fFlags = 0; 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCompiledPat = 0; 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLiteralText.remove(); 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets = NULL; 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets8 = NULL; 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDeferredStatus = U_ZERO_ERROR; 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMinMatchLen = 0; 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fFrameSize = 0; 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDataSize = 0; 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fGroupMap = NULL; 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMaxCaptureDigits = 1; 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fStaticSets = NULL; 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fStaticSets8 = NULL; 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fStartType = START_NO_INFO; 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialStringIdx = 0; 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialStringLen = 0; 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChars = NULL; 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChar = 0; 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChars8 = NULL; 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNeedsAltInput = FALSE; 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPattern = NULL; // will be set later 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPatternString = NULL; // may be set later 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCompiledPat = new UVector64(fDeferredStatus); 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fGroupMap = new UVector32(fDeferredStatus); 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets = new UVector(fDeferredStatus); 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChars = new UnicodeSet; 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChars8 = new Regex8BitSet; 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(fDeferredStatus)) { 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChars == NULL || fInitialChars8 == NULL) { 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Slot zero of the vector of sets is reserved. Fill it here. 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets->addElement((int32_t)0, fDeferredStatus); 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// zap Delete everything owned by this RegexPattern. 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RegexPattern::zap() { 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fCompiledPat; 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCompiledPat = NULL; 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=1; i<fSets->size(); i++) { 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *s; 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s = (UnicodeSet *)fSets->elementAt(i); 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (s != NULL) { 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete s; 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fSets; 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets = NULL; 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete[] fSets8; 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSets8 = NULL; 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fGroupMap; 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fGroupMap = NULL; 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fInitialChars; 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChars = NULL; 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fInitialChars8; 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fInitialChars8 = NULL; 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPattern != NULL) { 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_close(fPattern); 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPattern = NULL; 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPatternString != NULL) { 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fPatternString; 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fPatternString = NULL; 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Destructor 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::~RegexPattern() { 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) zap(); 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Clone 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern *RegexPattern::clone() const { 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexPattern *copy = new RegexPattern(*this); 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return copy; 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// operator == (comparison) Consider to patterns to be == if the 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// pattern strings and the flags are the same. 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Note that pattern strings with the same 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// characters can still be considered different. 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------- 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool RegexPattern::operator ==(const RegexPattern &other) const { 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (this->fPatternString != NULL && other.fPatternString != NULL) { 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *(this->fPatternString) == *(other.fPatternString); 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (this->fPattern == NULL) { 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (other.fPattern == NULL) { 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (other.fPattern != NULL) { 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(this->fPattern, 0); 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTEXT_SETNATIVEINDEX(other.fPattern, 0); 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return utext_equals(this->fPattern, other.fPattern); 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// compile 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern * U_EXPORT2 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::compile(const UnicodeString ®ex, 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t flags, 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError &pe, 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((flags & ~allFlags) != 0) { 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_REGEX_INVALID_FLAG; 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) { 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_REGEX_UNIMPLEMENTED; 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexPattern *This = new RegexPattern; 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (This == NULL) { 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(This->fDeferredStatus)) { 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = This->fDeferredStatus; 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete This; 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This->fFlags = flags; 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexCompile compiler(This, status); 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compiler.compile(regex, pe, status); 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete This; 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This = NULL; 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return This; 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// compile, UText mode 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern * U_EXPORT2 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::compile(UText *regex, 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t flags, 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError &pe, 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((flags & ~allFlags) != 0) { 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_REGEX_INVALID_FLAG; 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) { 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_REGEX_UNIMPLEMENTED; 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexPattern *This = new RegexPattern; 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (This == NULL) { 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(This->fDeferredStatus)) { 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = This->fDeferredStatus; 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete This; 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This->fFlags = flags; 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexCompile compiler(This, status); 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compiler.compile(regex, pe, status); 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete This; 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This = NULL; 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return This; 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// compile with default flags. 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern * U_EXPORT2 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::compile(const UnicodeString ®ex, 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError &pe, 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &err) 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return compile(regex, 0, pe, err); 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// compile with default flags, UText mode 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern * U_EXPORT2 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::compile(UText *regex, 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError &pe, 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &err) 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return compile(regex, 0, pe, err); 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// compile with no UParseErr parameter. 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern * U_EXPORT2 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::compile(const UnicodeString ®ex, 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t flags, 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &err) 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError pe; 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return compile(regex, flags, pe, err); 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// compile with no UParseErr parameter, UText mode 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern * U_EXPORT2 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPattern::compile(UText *regex, 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t flags, 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &err) 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError pe; 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return compile(regex, flags, pe, err); 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// flags 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uint32_t RegexPattern::flags() const { 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fFlags; 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// matcher(UnicodeString, err) 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexMatcher *RegexPattern::matcher(const UnicodeString &input, 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) const { 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher *retMatcher = matcher(status); 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (retMatcher != NULL) { 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retMatcher->fDeferredStatus = status; 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retMatcher->reset(input); 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return retMatcher; 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// matcher, UText mode 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexMatcher *RegexPattern::matcher(UText *input, 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) PatternIsUTextFlag /*flag*/, 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) const { 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher *retMatcher = matcher(status); 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (retMatcher != NULL) { 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retMatcher->fDeferredStatus = status; 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retMatcher->reset(input); 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return retMatcher; 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexMatcher *RegexPattern::matcher(const UChar * /*input*/, 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) const 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* This should never get called. The API with UnicodeString should be called instead. */ 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(status)) { 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_UNSUPPORTED_ERROR; 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// matcher(status) 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexMatcher *RegexPattern::matcher(UErrorCode &status) const { 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher *retMatcher = NULL; 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(fDeferredStatus)) { 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = fDeferredStatus; 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retMatcher = new RegexMatcher(this); 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (retMatcher == NULL) { 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return retMatcher; 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// matches Convenience function to test for a match, starting 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// with a pattern string and a data string. 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UnicodeString &input, 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError &pe, 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) { 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) {return FALSE;} 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool retVal; 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexPattern *pat = NULL; 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher *matcher = NULL; 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pat = RegexPattern::compile(regex, 0, pe, status); 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matcher = pat->matcher(input, status); 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retVal = matcher->matches(status); 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete matcher; 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete pat; 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return retVal; 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// matches, UText mode 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool U_EXPORT2 RegexPattern::matches(UText *regex, 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UText *input, 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UParseError &pe, 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) { 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) {return FALSE;} 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool retVal; 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexPattern *pat = NULL; 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher *matcher = NULL; 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pat = RegexPattern::compile(regex, 0, pe, status); 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matcher = pat->matcher(input, PATTERN_IS_UTEXT, status); 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) retVal = matcher->matches(status); 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete matcher; 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete pat; 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return retVal; 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// pattern 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString RegexPattern::pattern() const { 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPatternString != NULL) { 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *fPatternString; 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (fPattern == NULL) { 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return UnicodeString(); 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int64_t nativeLen = utext_nativeLength(fPattern); 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString result; 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *resultChars = result.getBuffer(len16); 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result.releaseBuffer(len16); 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// patternText 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UText *RegexPattern::patternText(UErrorCode &status) const { 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) {return NULL;} 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fPattern != NULL) { 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fPattern; 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexStaticSets::initGlobals(&status); 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return RegexStaticSets::gStaticSets->fEmptyText; 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// split 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RegexPattern::split(const UnicodeString &input, 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString dest[], 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t destCapacity, 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) const 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher m(this); 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t r = 0; 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check m's status to make sure all is ok. 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(m.fDeferredStatus)) { 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r = m.split(input, dest, destCapacity, status); 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return r; 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// split, UText mode 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t RegexPattern::split(UText *input, 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UText *dest[], 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t destCapacity, 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) const 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) RegexMatcher m(this); 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t r = 0; 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check m's status to make sure all is ok. 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_SUCCESS(m.fDeferredStatus)) { 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r = m.split(input, dest, destCapacity, status); 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return r; 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// dump Output the compiled form of the pattern. 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Debugging function only. 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------- 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if defined(REGEX_DEBUG) 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void RegexPattern::dumpOp(int32_t index) const { 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char * const opNames[] = {URX_OPCODE_NAMES}; 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t op = fCompiledPat->elementAti(index); 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t val = URX_VAL(op); 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t type = URX_TYPE(op); 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t pinnedType = type; 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) { 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pinnedType = 0; 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType])); 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch (type) { 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_NOP: 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_DOTANY: 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_DOTANY_ALL: 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_FAIL: 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_CARET: 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_DOLLAR: 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_BACKSLASH_G: 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_BACKSLASH_X: 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_END: 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_DOLLAR_M: 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_CARET_M: 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Types with no operand field of interest. 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_RESERVED_OP: 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_START_CAPTURE: 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_END_CAPTURE: 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_STATE_SAVE: 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_JMP: 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_JMP_SAV: 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_JMP_SAV_X: 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_BACKSLASH_B: 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_BACKSLASH_BU: 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_BACKSLASH_D: 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_BACKSLASH_Z: 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_STRING_LEN: 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_CTR_INIT: 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_CTR_INIT_NG: 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_CTR_LOOP: 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_CTR_LOOP_NG: 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_RELOC_OPRND: 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_STO_SP: 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LD_SP: 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_BACKREF: 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_STO_INP_LOC: 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_JMPX: 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LA_START: 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LA_END: 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_BACKREF_I: 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LB_START: 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LB_CONT: 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LB_END: 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LBN_CONT: 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LBN_END: 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LOOP_C: 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LOOP_DOT_I: 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // types with an integer operand field. 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%d", val)); 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_ONECHAR: 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_ONECHAR_I: 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?')); 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_STRING: 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_STRING_I: 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t lengthOp = fCompiledPat->elementAti(index+1); 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t length = URX_VAL(lengthOp); 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i; 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=val; i<val+length; i++) { 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar c = fLiteralText[i]; 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c < 32 || c >= 256) {c = '.';} 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_SETREF: 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_LOOP_SR_I: 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString s; 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) set->toPattern(s, TRUE); 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (int32_t i=0; i<s.length(); i++) { 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_STATIC_SETREF: 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case URX_STAT_SETREF_N: 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString s; 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (val & URX_NEG_SET) { 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("NOT ")); 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) val &= ~URX_NEG_SET; 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *set = fStaticSets[val]; 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) set->toPattern(s, TRUE); 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (int32_t i=0; i<s.length(); i++) { 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("??????")); 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("\n")); 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if defined(REGEX_DEBUG) 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)RegexPatternDump(const RegexPattern *This) { 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int index; 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: ")); 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c = utext_next32From(This->fPattern, 0); 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (c != U_SENTINEL) { 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c<32 || c>256) { 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = '.'; 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c = UTEXT_NEXT32(This->fPattern); 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("\n")); 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen)); 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType))); 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (This->fStartType == START_STRING) { 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \"")); 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) { 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates. 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("\"\n")); 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (This->fStartType == START_SET) { 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t numSetChars = This->fInitialChars->size(); 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (numSetChars > 20) { 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) numSetChars = 20; 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : ")); 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<numSetChars; i++) { 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c = This->fInitialChars->charAt(i); 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (0x20<c && c <0x7e) { 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%c ", c)); 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%#x ", c)); 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (numSetChars < This->fInitialChars->size()) { 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF((" ...")); 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("\n")); 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (This->fStartType == START_CHAR) { 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF((" First char of Match : ")); 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) { 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar)); 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar)); 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \ 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "-------------------------------------------\n")); 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (index = 0; index<This->fCompiledPat->size(); index++) { 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This->dumpOp(index); 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) REGEX_DUMP_DEBUG_PRINTF(("\n\n")); 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 840