1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// file: repattrn.cpp 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*************************************************************************** 61b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2002-2015 International Business Machines Corporation * 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and others. All rights reserved. * 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*************************************************************************** 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uclean.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert#include "uhash.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h" 2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uvectr64.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexcmp.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regeximp.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// RegexPattern Default Constructor 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::RegexPattern() { 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Init all of this instances data. 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru init(); 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Copy Constructor Note: This is a rather inefficient implementation, 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// but it probably doesn't matter. 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { 46c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(); 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *this = other; 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Assignment Operator 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern &RegexPattern::operator = (const RegexPattern &other) { 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (this == &other) { 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Source and destination are the same. Don't do anything. 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Clean out any previous contents of object being assigned to. 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru zap(); 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Give target object a default initialization 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru init(); 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy simple fields 701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fDeferredStatus = other.fDeferredStatus; 711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(fDeferredStatus)) { 731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return *this; 741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (other.fPatternString == NULL) { 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; 781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus); 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = new UnicodeString(*(other.fPatternString)); 811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fPatternString == NULL) { 8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 831b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus); 8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 871b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(fDeferredStatus)) { 881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return *this; 891b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFlags = other.fFlags; 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLiteralText = other.fLiteralText; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMinMatchLen = other.fMinMatchLen; 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFrameSize = other.fFrameSize; 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDataSize = other.fDataSize; 96c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fStaticSets = other.fStaticSets; 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets8 = other.fStaticSets8; 98c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStartType = other.fStartType; 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringIdx = other.fInitialStringIdx; 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringLen = other.fInitialStringLen; 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *fInitialChars = *other.fInitialChars; 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChar = other.fInitialChar; 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *fInitialChars8 = *other.fInitialChars8; 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNeedsAltInput = other.fNeedsAltInput; 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy the pattern. It's just values, nothing deep to copy. 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap->assign(*other.fGroupMap, fDeferredStatus); 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Copy the Unicode Sets. 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Could be made more efficient if the sets were reference counted and shared, 113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // but I doubt that pattern copying will be particularly common. 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: init() already added an empty element zero to fSets 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numSets = other.fSets->size(); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = new Regex8BitSet[numSets]; 118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fSets8 == NULL) { 119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 121c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=1; i<numSets; i++) { 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *newSet = new UnicodeSet(*sourceSet); 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (newSet == NULL) { 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement(newSet, fDeferredStatus); 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8[i] = other.fSets8[i]; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Copy the named capture group hash map. 1371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t hashPos = UHASH_FIRST; 1381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) { 1391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(fDeferredStatus)) { 1401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 1411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 1421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer; 1431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UnicodeString *key = new UnicodeString(*name); 1441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t val = hashEl->value.integer; 1451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (key == NULL) { 1461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 1471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 1481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus); 1491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 1501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// init Shared initialization for use by constructors. 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Bring an uninitialized RegexPattern up to a default state. 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::init() { 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFlags = 0; 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat = 0; 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLiteralText.remove(); 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = NULL; 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = NULL; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_ZERO_ERROR; 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMinMatchLen = 0; 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFrameSize = 0; 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDataSize = 0; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = NULL; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets = NULL; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStaticSets8 = NULL; 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStartType = START_NO_INFO; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringIdx = 0; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialStringLen = 0; 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = NULL; 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChar = 0; 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = NULL; 18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNeedsAltInput = FALSE; 1811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fNamedCaptureMap = NULL; 182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = NULL; // will be set later 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; // may be set later 18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fCompiledPat = new UVector64(fDeferredStatus); 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = new UVector32(fDeferredStatus); 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = new UVector(fDeferredStatus); 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = new UnicodeSet; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = new Regex8BitSet; 1901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash function 1911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_compareUnicodeString, // Key comparator function 1921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_compareLong, // Value comparator function 1931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert &fDeferredStatus); 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || 1981b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) { 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Slot zero of the vector of sets is reserved. Fill it here. 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets->addElement((int32_t)0, fDeferredStatus); 2051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 2061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // fNamedCaptureMap owns its key strings, type (UnicodeString *) 2071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject); 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// zap Delete everything owned by this RegexPattern. 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::zap() { 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fCompiledPat; 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fCompiledPat = NULL; 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=1; i<fSets->size(); i++) { 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *s; 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s = (UnicodeSet *)fSets->elementAt(i); 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s != NULL) { 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete s; 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fSets; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets = NULL; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete[] fSets8; 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSets8 = NULL; 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fGroupMap; 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGroupMap = NULL; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fInitialChars; 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars = NULL; 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fInitialChars8; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fInitialChars8 = NULL; 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern != NULL) { 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(fPattern); 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = NULL; 24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPatternString != NULL) { 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fPatternString; 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternString = NULL; 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uhash_close(fNamedCaptureMap); 2461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fNamedCaptureMap = NULL; 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Destructor 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::~RegexPattern() { 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru zap(); 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Clone 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruRegexPattern *RegexPattern::clone() const { 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *copy = new RegexPattern(*this); 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return copy; 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// operator == (comparison) Consider to patterns to be == if the 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// pattern strings and the flags are the same. 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Note that pattern strings with the same 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// characters can still be considered different. 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------- 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexPattern::operator ==(const RegexPattern &other) const { 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (this->fPatternString != NULL && other.fPatternString != NULL) { 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *(this->fPatternString) == *(other.fPatternString); 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (this->fPattern == NULL) { 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (other.fPattern == NULL) { 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (other.fPattern != NULL) { 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(this->fPattern, 0); 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(other.fPattern, 0); 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_equals(this->fPattern, other.fPattern); 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// compile 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::compile(const UnicodeString ®ex, 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((flags & ~allFlags) != 0) { 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_INVALID_FLAG; 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 32083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if ((flags & UREGEX_CANON_EQ) != 0) { 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_UNIMPLEMENTED; 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *This = new RegexPattern; 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (This == NULL) { 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_MEMORY_ALLOCATION_ERROR; 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(This->fDeferredStatus)) { 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = This->fDeferredStatus; 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete This; 33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho This->fFlags = flags; 336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexCompile compiler(This, status); 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compiler.compile(regex, pe, status); 339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete This; 34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho This = NULL; 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return This; 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile, UText mode 35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 35250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 35350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) 35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((flags & ~allFlags) != 0) { 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_REGEX_INVALID_FLAG; 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if ((flags & UREGEX_CANON_EQ) != 0) { 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_REGEX_UNIMPLEMENTED; 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *This = new RegexPattern; 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (This == NULL) { 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(This->fDeferredStatus)) { 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = This->fDeferredStatus; 383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete This; 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru This->fFlags = flags; 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexCompile compiler(This, status); 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compiler.compile(regex, pe, status); 390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete This; 393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru This = NULL; 394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return This; 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// compile with default flags. 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern::compile(const UnicodeString ®ex, 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode &err) 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 407c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return compile(regex, 0, pe, err); 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile with default flags, UText mode 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return compile(regex, 0, pe, err); 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// compile with no UParseErr parameter. 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexPattern * U_EXPORT2 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(const UnicodeString ®ex, 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return compile(regex, flags, pe, err); 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// compile with no UParseErr parameter, UText mode 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern * U_EXPORT2 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexPattern::compile(UText *regex, 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &err) 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return compile(regex, flags, pe, err); 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// flags 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuint32_t RegexPattern::flags() const { 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fFlags; 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matcher(UnicodeString, err) 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher *RegexPattern::matcher(const UnicodeString &input, 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const { 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *retMatcher = matcher(status); 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (retMatcher != NULL) { 46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retMatcher->fDeferredStatus = status; 46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retMatcher->reset(input); 47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retMatcher; 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matcher(status) 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher *RegexPattern::matcher(UErrorCode &status) const { 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *retMatcher = NULL; 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retMatcher = new RegexMatcher(this); 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (retMatcher == NULL) { 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retMatcher; 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// matches Convenience function to test for a match, starting 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// with a pattern string and a data string. 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &input, 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError &pe, 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) {return FALSE;} 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool retVal; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexPattern *pat = NULL; 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher *matcher = NULL; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = RegexPattern::compile(regex, 0, pe, status); 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matcher = pat->matcher(input, status); 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retVal = matcher->matches(status); 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete matcher; 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete pat; 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retVal; 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// matches, UText mode 53050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 53150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool U_EXPORT2 RegexPattern::matches(UText *regex, 53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *input, 53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError &pe, 53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) {return FALSE;} 53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 538b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool retVal = FALSE; 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexPattern *pat = NULL; 54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher *matcher = NULL; 54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 54250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pat = RegexPattern::compile(regex, 0, pe, status); 543b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho matcher = pat->matcher(status); 544b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) { 545b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho matcher->reset(input); 546b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retVal = matcher->matches(status); 547b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete matcher; 55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete pat; 55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retVal; 55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// pattern 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString RegexPattern::pattern() const { 56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPatternString != NULL) { 56550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *fPatternString; 56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (fPattern == NULL) { 56750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UnicodeString(); 56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nativeLen = utext_nativeLength(fPattern); 57150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error 57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *resultChars = result.getBuffer(len16); 57650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning 57750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result.releaseBuffer(len16); 578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return result; 58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 58850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// patternText 58950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 59050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 59127f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexPattern::patternText(UErrorCode &status) const { 59227f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) {return NULL;} 59327f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR; 59427f654740f2a26ad62a5c155af9199af9e69b889claireho 59550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern != NULL) { 59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fPattern; 59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexStaticSets::initGlobals(&status); 59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return RegexStaticSets::gStaticSets->fEmptyText; 60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//-------------------------------------------------------------------------------- 6051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// 6061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// groupNumberFromName() 6071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// 6081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert//-------------------------------------------------------------------------------- 6091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertint32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const { 6101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(status)) { 6111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return 0; 6121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 6131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 6141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // No need to explicitly check for syntactically valid names. 6151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Invalid ones will never be in the map, and the lookup will fail. 6161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 6171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t number = uhash_geti(fNamedCaptureMap, &groupName); 6181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (number == 0) { 6191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 6201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 6211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return number; 6221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert} 6231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 6241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertint32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const { 6251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(status)) { 6261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return 0; 6271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 6281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UnicodeString name(groupName, nameLength, US_INV); 6291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return groupNumberFromName(name, status); 6301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert} 6311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 63250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 63350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 63450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// split 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RegexPattern::split(const UnicodeString &input, 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString dest[], 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 64150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) const 64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 64350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 64450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho RegexMatcher m(this); 64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t r = 0; 64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check m's status to make sure all is ok. 65050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(m.fDeferredStatus)) { 65150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho r = m.split(input, dest, destCapacity, status); 65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return r; 65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 65750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// split, UText mode 65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 65950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexPattern::split(UText *input, 66050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest[], 66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) const 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexMatcher m(this); 669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t r = 0; 670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check m's status to make sure all is ok. 671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_SUCCESS(m.fDeferredStatus)) { 672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru r = m.split(input, dest, destCapacity, status); 673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return r; 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// dump Output the compiled form of the pattern. 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debugging function only. 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------- 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexPattern::dumpOp(int32_t index) const { 686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (void)index; // Suppress warnings in non-debug build. 687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if defined(REGEX_DEBUG) 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char * const opNames[] = {URX_OPCODE_NAMES}; 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t op = fCompiledPat->elementAti(index); 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t val = URX_VAL(op); 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t type = URX_TYPE(op); 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t pinnedType = type; 693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) { 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pinnedType = 0; 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 696c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%4d %08x %-15s ", index, op, opNames[pinnedType]); 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (type) { 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_NOP: 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY: 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY_ALL: 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_FAIL: 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CARET: 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOLLAR: 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_G: 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_X: 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END: 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOLLAR_M: 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CARET_M: 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Types with no operand field of interest. 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 712c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_RESERVED_OP: 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_START_CAPTURE: 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END_CAPTURE: 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATE_SAVE: 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP: 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV: 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV_X: 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_B: 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_BU: 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_D: 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_Z: 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING_LEN: 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT: 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT_NG: 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP: 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP_NG: 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_RELOC_OPRND: 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_SP: 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LD_SP: 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF: 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_INP_LOC: 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMPX: 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_START: 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_END: 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF_I: 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_START: 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_CONT: 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_END: 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_CONT: 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_END: 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_C: 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_DOT_I: 7451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_H: 7461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_R: 7471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_V: 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // types with an integer operand field. 749fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%d", val); 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR: 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR_I: 754fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", val<256?val:'?'); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING: 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING_I: 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lengthOp = fCompiledPat->elementAti(index+1); 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length = URX_VAL(lengthOp); 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=val; i<val+length; i++) { 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = fLiteralText[i]; 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < 32 || c >= 256) {c = '.';} 767fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", c); 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_SETREF: 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_SR_I: 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(s, TRUE); 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<s.length(); i++) { 779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", s.charAt(i)); 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATIC_SETREF: 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STAT_SETREF_N: 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s; 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (val & URX_NEG_SET) { 789fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("NOT "); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru val &= ~URX_NEG_SET; 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = fStaticSets[val]; 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(s, TRUE); 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<s.length(); i++) { 795fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", s.charAt(i)); 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 802fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("??????"); 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 805fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n"); 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 807fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 810fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid RegexPattern::dumpPattern() const { 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(REGEX_DEBUG) 8121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // TODO: This function assumes an ASCII based charset. 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int index; 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 816fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("Original Pattern: "); 817fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = utext_next32From(fPattern, 0); 81850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 81950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<32 || c>256) { 82050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = '.'; 82150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 822fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", c); 823fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 824fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius c = UTEXT_NEXT32(fPattern); 825fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 826fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n"); 827fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Min Match Length: %d\n", fMinMatchLen); 828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); 829fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (fStartType == START_STRING) { 830fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Initial match string: \""); 831fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) { 832fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", fLiteralText[i]); // TODO: non-printables, surrogates. 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 834fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\"\n"); 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 836fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (fStartType == START_SET) { 837fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t numSetChars = fInitialChars->size(); 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (numSetChars > 20) { 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru numSetChars = 20; 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 841fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" Match First Chars : "); 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<numSetChars; i++) { 843fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = fInitialChars->charAt(i); 844c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (0x20<c && c <0x7e) { 845fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c ", c); 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 847fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%#x ", c); 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (numSetChars < fInitialChars->size()) { 851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" ..."); 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n"); 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (fStartType == START_CHAR) { 856fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf(" First char of Match : "); 857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (0x20 < fInitialChar && fInitialChar<0x7e) { 858fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c\n", fInitialChar); 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 860fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%#x\n", fInitialChar); 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert printf("Named Capture Groups:\n"); 8651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (uhash_count(fNamedCaptureMap) == 0) { 8661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert printf(" None\n"); 8671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 8681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t pos = UHASH_FIRST; 8691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const UHashElement *el = NULL; 8701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { 8711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const UnicodeString *name = (const UnicodeString *)el->key.pointer; 8721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert char s[100]; 8731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert name->extract(0, 99, s, sizeof(s), US_INV); // capture group names are invariant. 8741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t number = el->value.integer; 8751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert printf(" %d\t%s\n", number, s); 8761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 8771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 8781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 879fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\nIndex Binary Type Operand\n" \ 880fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "-------------------------------------------\n"); 881fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for (index = 0; index<fCompiledPat->size(); index++) { 882fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dumpOp(index); 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 884fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("\n\n"); 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 886fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 894