1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// regexst.h 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Copyright (C) 2004-2010, International Business Machines Corporation and others. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This file contains class RegexStaticSets 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This class is internal to the regular expression implementation. 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// For the public Regular Expression API, see the file "unicode/regex.h" 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// RegexStaticSets groups together the common UnicodeSets that are needed 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// for compiling or executing RegularExpressions. This grouping simplifies 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// the thread safe lazy creation and sharing of these sets across 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// all instances of regular expressions. 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uprops.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexcst.h" // Contains state table for the regex pattern parser. 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // generated by a Perl script. 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Unicode Set pattern strings for all of the required constant sets. 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Initialized with hex values for portability to EBCDIC based machines. 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Really ugly, but there's no good way to avoid it. 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// "Rule Char" Characters are those with no special meaning, and therefore do not 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// need to be escaped to appear as literals in a regexp. Expressed 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// as the inverse of those needing escaping -- [^\*\?\+\[\(\)\{\}\^\$\|\\\.] 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gRuleSet_rule_char_pattern[] = { 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [ ^ \ * \ ? \ + \ [ \ ( / ) 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5e, 0x5c, 0x2a, 0x5c, 0x3f, 0x5c, 0x2b, 0x5c, 0x5b, 0x5c, 0x28, 0x5c, 0x29, 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // \ { \ } \ ^ \ $ \ | \ \ \ . ] 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5c, 0x7b,0x5c, 0x7d, 0x5c, 0x5e, 0x5c, 0x24, 0x5c, 0x7c, 0x5c, 0x5c, 0x5c, 0x2e, 0x5d, 0}; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gRuleSet_digit_char_pattern[] = { 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// [ 0 - 9 ] 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0}; 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Here are the backslash escape characters that ICU's unescape() function 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// will handle. 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gUnescapeCharPattern[] = { 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// [ a c e f n r t u U x ] 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x61, 0x63, 0x65, 0x66, 0x6e, 0x72, 0x74, 0x75, 0x55, 0x78, 0x5d, 0}; 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Unicode Set Definitions for Regular Expression \w 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gIsWordPattern[] = { 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// [ \ p { A l p h a b e t i c } 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5c, 0x70, 0x7b, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x62, 0x65, 0x74, 0x69, 0x63, 0x7d, 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// \ p { M } Mark 79c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0x5c, 0x70, 0x7b, 0x4d, 0x7d, 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// \ p { N d } Digit_Numeric 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5c, 0x70, 0x7b, 0x4e, 0x64, 0x7d, 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// \ p { P c } ] Connector_Punctuation 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5c, 0x70, 0x7b, 0x50, 0x63, 0x7d, 0x5d, 0}; 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Unicode Set Definitions for Regular Expression \s 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gIsSpacePattern[] = { 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// [ \ p { W h i t e S p a c e } ] 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5c, 0x70, 0x7b, 0x57, 0x68, 0x69, 0x74, 0x65, 0x53, 0x70, 0x61, 0x63, 0x65, 0x7d, 0x5d, 0}; 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// UnicodeSets used in implementation of Grapheme Cluster detection, \X 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gGC_ControlPattern[] = { 98c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// [ [ : Z l : ] [ : Z p : ] 99c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0x5b, 0x5b, 0x3a, 0x5A, 0x6c, 0x3a, 0x5d, 0x5b, 0x3a, 0x5A, 0x70, 0x3a, 0x5d, 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// [ : C c : ] [ : C f : ] - 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x3a, 0x43, 0x63, 0x3a, 0x5d, 0x5b, 0x3a, 0x43, 0x66, 0x3a, 0x5d, 0x2d, 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// [ : G r a p h e m e _ 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x3a, 0x47, 0x72, 0x61, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x5f, 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// E x t e n d : ] ] 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x3a, 0x5d, 0x5d, 0}; 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gGC_ExtendPattern[] = { 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// [ \ p { G r a p h e m e _ 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5c, 0x70, 0x7b, 0x47, 0x72, 0x61, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x5f, 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// E x t e n d } ] 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x7d, 0x5d, 0}; 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gGC_LPattern[] = { 114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// [ \ p { H a n g u l _ S y l 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5c, 0x70, 0x7b, 0x48, 0x61, 0x6e, 0x67, 0x75, 0x6c, 0x5f, 0x53, 0x79, 0x6c, 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// l a b l e _ T y p e = L } ] 117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x54, 0x79, 0x70, 0x65, 0x3d, 0x4c, 0x7d, 0x5d, 0}; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gGC_VPattern[] = { 120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// [ \ p { H a n g u l _ S y l 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5c, 0x70, 0x7b, 0x48, 0x61, 0x6e, 0x67, 0x75, 0x6c, 0x5f, 0x53, 0x79, 0x6c, 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// l a b l e _ T y p e = V } ] 123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x54, 0x79, 0x70, 0x65, 0x3d, 0x56, 0x7d, 0x5d, 0}; 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gGC_TPattern[] = { 126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// [ \ p { H a n g u l _ S y l 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5c, 0x70, 0x7b, 0x48, 0x61, 0x6e, 0x67, 0x75, 0x6c, 0x5f, 0x53, 0x79, 0x6c, 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// l a b l e _ T y p e = T } ] 129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x54, 0x79, 0x70, 0x65, 0x3d, 0x54, 0x7d, 0x5d, 0}; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gGC_LVPattern[] = { 132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// [ \ p { H a n g u l _ S y l 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5c, 0x70, 0x7b, 0x48, 0x61, 0x6e, 0x67, 0x75, 0x6c, 0x5f, 0x53, 0x79, 0x6c, 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// l a b l e _ T y p e = L V } ] 135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x54, 0x79, 0x70, 0x65, 0x3d, 0x4c, 0x56, 0x7d, 0x5d, 0}; 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar gGC_LVTPattern[] = { 138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// [ \ p { H a n g u l _ S y l 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x5b, 0x5c, 0x70, 0x7b, 0x48, 0x61, 0x6e, 0x67, 0x75, 0x6c, 0x5f, 0x53, 0x79, 0x6c, 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// l a b l e _ T y p e = L V T } ] 141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x54, 0x79, 0x70, 0x65, 0x3d, 0x4c, 0x56, 0x54, 0x7d, 0x5d, 0}; 142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexStaticSets *RegexStaticSets::gStaticSets = NULL; 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexStaticSets::RegexStaticSets(UErrorCode *status) 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru: 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerufUnescapeCharSet(UnicodeString(TRUE, gUnescapeCharPattern, -1), *status), 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehofRuleDigitsAlias(NULL), 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehofEmptyText(NULL) 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // First zero out everything 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<URX_LAST_SET; i++) { 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[i] = NULL; 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Then init the sets to their correct values. 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_ISWORD_SET] = new UnicodeSet(UnicodeString(TRUE, gIsWordPattern, -1), *status); 159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPropSets[URX_ISSPACE_SET] = new UnicodeSet(UnicodeString(TRUE, gIsSpacePattern, -1), *status); 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_EXTEND] = new UnicodeSet(UnicodeString(TRUE, gGC_ExtendPattern, -1), *status); 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_CONTROL] = new UnicodeSet(UnicodeString(TRUE, gGC_ControlPattern, -1), *status); 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_L] = new UnicodeSet(UnicodeString(TRUE, gGC_LPattern, -1), *status); 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_V] = new UnicodeSet(UnicodeString(TRUE, gGC_VPattern, -1), *status); 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_T] = new UnicodeSet(UnicodeString(TRUE, gGC_TPattern, -1), *status); 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_LV] = new UnicodeSet(UnicodeString(TRUE, gGC_LVPattern, -1), *status); 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_LVT] = new UnicodeSet(UnicodeString(TRUE, gGC_LVTPattern, -1), *status); 167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for null pointers 169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fPropSets[URX_ISWORD_SET] == NULL || fPropSets[URX_ISSPACE_SET] == NULL || fPropSets[URX_GC_EXTEND] == NULL || 170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPropSets[URX_GC_CONTROL] == NULL || fPropSets[URX_GC_L] == NULL || fPropSets[URX_GC_V] == NULL || 171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPropSets[URX_GC_T] == NULL || fPropSets[URX_GC_LV] == NULL || fPropSets[URX_GC_LVT] == NULL) { 172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto ExitConstrDeleteAll; 173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Bail out if we were unable to create the above sets. 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The rest of the initialization needs them, so we cannot proceed. 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The following sets are dynamically constructed, because their 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // initialization strings would be unreasonable. 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "Normal" is the set of characters that don't need special handling 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // when finding grapheme cluster boundaries. 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_NORMAL] = new UnicodeSet(0, UnicodeSet::MAX_VALUE); 192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Null pointer check 193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fPropSets[URX_GC_NORMAL] == NULL) { 194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto ExitConstrDeleteAll; 195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_NORMAL]->remove(0xac00, 0xd7a4); 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_NORMAL]->removeAll(*fPropSets[URX_GC_CONTROL]); 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_NORMAL]->removeAll(*fPropSets[URX_GC_L]); 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_NORMAL]->removeAll(*fPropSets[URX_GC_V]); 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[URX_GC_NORMAL]->removeAll(*fPropSets[URX_GC_T]); 201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Initialize the 8-bit fast bit sets from the parallel full 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeSets. 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<URX_LAST_SET; i++) { 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fPropSets[i]) { 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[i]->compact(); 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets8[i].init(fPropSets[i]); 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Sets used while parsing rules, but not referenced from the parse state table 212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRuleSets[kRuleSet_rule_char-128] = UnicodeSet(UnicodeString(TRUE, gRuleSet_rule_char_pattern, -1), *status); 213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRuleSets[kRuleSet_digit_char-128] = UnicodeSet(UnicodeString(TRUE, gRuleSet_digit_char_pattern, -1), *status); 214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRuleDigitsAlias = &fRuleSets[kRuleSet_digit_char-128]; 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<(int32_t)(sizeof(fRuleSets)/sizeof(fRuleSets[0])); i++) { 216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRuleSets[i].compact(); 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Finally, initialize an empty string for utility purposes 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fEmptyText = utext_openUChars(NULL, NULL, 0, status); 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; // If we reached this point, everything is fine so just exit 223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruExitConstrDeleteAll: // Remove fPropSets and fRuleSets and return error 225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<URX_LAST_SET; i++) { 226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete fPropSets[i]; 227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPropSets[i] = NULL; 228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexStaticSets::~RegexStaticSets() { 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<URX_LAST_SET; i++) { 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fPropSets[i]; 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPropSets[i] = NULL; 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fRuleDigitsAlias = NULL; 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(fEmptyText); 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// regex_cleanup Memory cleanup function, free/delete all 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// cached memory. Called by ICU's u_cleanup() function. 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------ 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexStaticSets::cleanup(void) { 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete RegexStaticSets::gStaticSets; 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexStaticSets::gStaticSets = NULL; 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool U_CALLCONV 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruregex_cleanup(void) { 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return RegexStaticSets::cleanup(); 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid RegexStaticSets::initGlobals(UErrorCode *status) { 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RegexStaticSets *p; 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UMTX_CHECK(NULL, gStaticSets, p); 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p == NULL) { 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p = new RegexStaticSets(status); 271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (p == NULL) { 272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete p; 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(NULL); 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (gStaticSets == NULL) { 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru gStaticSets = p; 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p = NULL; 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(NULL); 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (p) { 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete p; 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucln_i18n_registerCleanup(UCLN_I18N_REGEX, regex_cleanup); 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 295