16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 1999-2013, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: uniset_props.cpp 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2004aug25 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Character property dependent functions moved here from uniset.cpp 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/parsepos.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uscript.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/symtable.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uset.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/locid.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/brkiter.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uset_imp.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ruleiter.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucln_cmn.h" 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "util.h" 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h" 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uprops.h" 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "propname.h" 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "normalizer2impl.h" 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucase.h" 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ubidi_props.h" 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uinvchar.h" 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uprops.h" 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "charstr.h" 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h" 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "mutex.h" 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "umutex.h" 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h" 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "hash.h" 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_USE 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// initial storage. Must be >= 0 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// *** same as in uniset.cpp ! *** 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define START_EXTRA 16 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Define UChar constants using hex for EBCDIC compatibility 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Used #define to reduce private static exports and memory access time. 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SET_OPEN ((UChar)0x005B) /*[*/ 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SET_CLOSE ((UChar)0x005D) /*]*/ 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define HYPHEN ((UChar)0x002D) /*-*/ 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define COMPLEMENT ((UChar)0x005E) /*^*/ 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define COLON ((UChar)0x003A) /*:*/ 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define BACKSLASH ((UChar)0x005C) /*\*/ 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define INTERSECTION ((UChar)0x0026) /*&*/ 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define UPPER_U ((UChar)0x0055) /*U*/ 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LOWER_U ((UChar)0x0075) /*u*/ 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define OPEN_BRACE ((UChar)123) /*{*/ 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define CLOSE_BRACE ((UChar)125) /*}*/ 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define UPPER_P ((UChar)0x0050) /*P*/ 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LOWER_P ((UChar)0x0070) /*p*/ 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define UPPER_N ((UChar)78) /*N*/ 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define EQUALS ((UChar)0x003D) /*=*/ 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:" 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]" 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p" 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}" 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N" 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/ 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Special property set IDs 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const char ANY[] = "ANY"; // [\u0000-\U0010FFFF] 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const char ASCII[] = "ASCII"; // [\u0000-\u007F] 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const char ASSIGNED[] = "Assigned"; // [:^Cn:] 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Unicode name property alias 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define NAME_PROP "na" 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define NAME_PROP_LENGTH 2 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Delimiter string used in patterns to close a category reference: 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ":]". Example: "[:Lu:]". 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */ 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Cached sets ------------------------------------------------------------- *** 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV uset_cleanup(); 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct Inclusion { 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet *fSet; 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UInitOnce fInitOnce; 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions() 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UnicodeSet *uni32Singleton; 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER; 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Inclusions list 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// USetAdder implementation 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Does not use uset.h to reduce code dependencies 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_set_add(USet *set, UChar32 c) { 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((UnicodeSet *)set)->add(c); 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_set_addRange(USet *set, UChar32 start, UChar32 end) { 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((UnicodeSet *)set)->add(start, end); 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_set_addString(USet *set, const UChar *str, int32_t length) { 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Cleanup function for UnicodeSet 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV uset_cleanup(void) { 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(int32_t i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) { 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Inclusion &in = gInclusions[i]; 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete in.fSet; 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org in.fSet = NULL; 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org in.fInitOnce.reset(); 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete uni32Singleton; 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uni32Singleton = NULL; 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uni32InitOnce.reset(); 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgReduce excessive reallocation, and make it easier to detect initialization problems. 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUsually you don't see smaller sets than this for Unicode 5.0. 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define DEFAULT_INCLUSION_CAPACITY 3072 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) { 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This function is invoked only via umtx_initOnce(). 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This function is a friend of class UnicodeSet. 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(src >=0 && src<UPROPS_SRC_COUNT); 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet * &incl = gInclusions[src].fSet; 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(incl == NULL); 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org incl = new UnicodeSet(); 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (incl == NULL) { 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org USetAdder sa = { 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (USet *)incl, 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _set_add, 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _set_addRange, 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _set_addString, 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, // don't need remove() 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL // don't need removeRange() 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }; 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, status); 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(src) { 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_CHAR: 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uchar_addPropertyStarts(&sa, &status); 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_PROPSVEC: 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org upropsvec_addPropertyStarts(&sa, &status); 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_CHAR_AND_PROPSVEC: 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uchar_addPropertyStarts(&sa, &status); 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org upropsvec_addPropertyStarts(&sa, &status); 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_CASE_AND_NORM: { 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org impl->addPropertyStarts(&sa, status); 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status); 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_NFC: { 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org impl->addPropertyStarts(&sa, status); 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_NFKC: { 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(status); 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org impl->addPropertyStarts(&sa, status); 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_NFKC_CF: { 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(status); 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org impl->addPropertyStarts(&sa, status); 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_NFC_CANON_ITER: { 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org impl->addCanonIterPropertyStarts(&sa, status); 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_CASE: 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status); 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UPROPS_SRC_BIDI: 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status); 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INTERNAL_PROGRAM_ERROR; 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete incl; 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org incl = NULL; 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compact for caching 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org incl->compact(); 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(src >=0 && src<UPROPS_SRC_COUNT); 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Inclusion &i = gInclusions[src]; 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_initOnce(i.fInitOnce, &UnicodeSet_initInclusion, src, status); 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return i.fSet; 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Cache some sets for other services -------------------------------------- *** 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid U_CALLCONV createUni32Set(UErrorCode &errorCode) { 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(uni32Singleton == NULL); 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uni32Singleton = new UnicodeSet(UNICODE_STRING_SIMPLE("[:age=3.2:]"), errorCode); 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(uni32Singleton==NULL) { 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uni32Singleton->freeze(); 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC UnicodeSet * 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguniset_getUnicode32Instance(UErrorCode &errorCode) { 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_initOnce(uni32InitOnce, &createUni32Set, errorCode); 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return uni32Singleton; 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// helper functions for matching of pattern syntax pieces ------------------ *** 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// these functions are parallel to the PERL_OPEN etc. strings above 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// using these functions is not only faster than UnicodeString::compare() and 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// caseCompare(), but they also make UnicodeSet work for simple patterns when 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// no Unicode properties data is available - when caseCompare() fails 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline UBool 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisPerlOpen(const UnicodeString &pattern, int32_t pos) { 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c; 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P); 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*static inline UBool 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisPerlClose(const UnicodeString &pattern, int32_t pos) { 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return pattern.charAt(pos)==CLOSE_BRACE; 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/ 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline UBool 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisNameOpen(const UnicodeString &pattern, int32_t pos) { 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N; 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline UBool 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisPOSIXOpen(const UnicodeString &pattern, int32_t pos) { 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON; 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*static inline UBool 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisPOSIXClose(const UnicodeString &pattern, int32_t pos) { 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE; 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/ 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// TODO memory debugging provided inside uniset.cpp 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// could be made available here but probably obsolete with use of modern 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// memory leak checker tools 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _dbgct(me) 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Constructors &c 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Constructs a set from the given pattern, optionally ignoring 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * white space. See the class description for the syntax of the 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pattern language. 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param pattern a string specifying what characters are in the set 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet::UnicodeSet(const UnicodeString& pattern, 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) : 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFlags(0) 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)){ 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test for NULL */ 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(list == NULL) { 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org allocateStrings(status); 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPattern(pattern, status); 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _dbgct(this); 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Public API 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) { 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Equivalent to 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status); 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // but without dependency on closeOver(). 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ParsePosition pos(0); 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPatternIgnoreSpace(pattern, pos, NULL, status); 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) return *this; 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i = pos.getIndex(); 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Skip over trailing whitespace 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ICU_Utility::skipWhitespace(pattern, i, TRUE); 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i != pattern.length()) { 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ParsePosition& pos, 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SymbolTable* symbols, 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) { 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isFrozen()) { 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_NO_WRITE_PERMISSION; 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Need to build the pattern in a temporary string because 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // _applyPattern calls add() etc., which set pat to empty. 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString rebuiltPat; 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RuleCharacterIterator chars(pattern, symbols, pos); 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status); 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) return; 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (chars.inVariable()) { 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Extra chars in variable value"); 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MALFORMED_SET; 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setPattern(rebuiltPat); 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return true if the given position, in the given pattern, appears 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to be the start of a UnicodeSet pattern. 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ((pos+1) < pattern.length() && 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.charAt(pos) == (UChar)91/*[*/) || 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resemblesPropertyPattern(pattern, pos); 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Implementation: Pattern parsing 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A small all-inline class to manage a UnicodeSet pointer. Add 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * operator->() etc. as needed. 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass UnicodeSetPointer { 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet* p; 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpublic: 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inline UnicodeSetPointer() : p(0) {} 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inline ~UnicodeSetPointer() { delete p; } 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inline UnicodeSet* pointer() { return p; } 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inline UBool allocate() { 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (p == 0) { 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p = new UnicodeSet(); 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return p != 0; 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Parse the pattern from the given RuleCharacterIterator. The 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * iterator is advanced over the parsed pattern. 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param chars iterator over the pattern characters. Upon return 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * it will be advanced to the first character after the parsed 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pattern, or the end of the iteration if all characters are 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * parsed. 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param symbols symbol table to use to parse and dereference 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * variables, or null if none. 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param rebuiltPat the pattern that was parsed, rebuilt or 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * copied from the input pattern, as appropriate. 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param options a bit mask of zero or more of the following: 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * IGNORE_SPACE, CASE. 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid UnicodeSet::applyPattern(RuleCharacterIterator& chars, 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SymbolTable* symbols, 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString& rebuiltPat, 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t options, 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& ec) { 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Syntax characters: [ ] ^ - & { } 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Recognized special forms for chars, sets: c-c s-s s&s 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t opts = RuleCharacterIterator::PARSE_VARIABLES | 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RuleCharacterIterator::PARSE_ESCAPES; 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((options & USET_IGNORE_SPACE) != 0) { 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opts |= RuleCharacterIterator::SKIP_WHITESPACE; 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString patLocal, buf; 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool usePat = FALSE; 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSetPointer scratch; 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RuleCharacterIterator::Pos backup; 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // mode: 0=before [, 1=between [...], 2=after ] 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // lastItem: 0=none, 1=char, 2=set 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t lastItem = 0, mode = 0; 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 lastChar = 0; 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar op = 0; 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool invert = FALSE; 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clear(); 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (mode != 2 && !chars.atEnd()) { 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT((lastItem == 0 && op == 0) || 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) || 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ || 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op == INTERSECTION /*'&'*/))); 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = 0; 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool literal = FALSE; 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet* nested = 0; // alias - do not delete 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // -------- Check for property pattern 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t setMode = 0; 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (resemblesPropertyPattern(chars, opts)) { 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setMode = 2; 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // -------- Parse '[' of opening delimiter OR nested set. 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If there is a nested set, use `setMode' to define how 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the set should be parsed. If the '[' is part of the 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // opening delimiter for this pattern, parse special 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // strings "[", "[^", "[-", and "[^-". Check for stand-in 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // characters representing a nested set in the symbol 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // table. 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Prepare to backup if necessary 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.getPos(backup); 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = chars.next(opts, literal, ec); 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x5B /*'['*/ && !literal) { 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mode == 1) { 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.setPos(backup); // backup 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setMode = 1; 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Handle opening '[' delimiter 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mode = 1; 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append((UChar) 0x5B /*'['*/); 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.getPos(backup); // prepare to backup 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = chars.next(opts, literal, ec); 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x5E /*'^'*/ && !literal) { 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org invert = TRUE; 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append((UChar) 0x5E /*'^'*/); 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.getPos(backup); // prepare to backup 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = chars.next(opts, literal, ec); 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fall through to handle special leading '-'; 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // otherwise restart loop for nested [], \p{}, etc. 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == HYPHEN /*'-'*/) { 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org literal = TRUE; 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fall through to handle literal '-' below 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.setPos(backup); // backup 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (symbols != 0) { 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeFunctor *m = symbols->lookupMatcher(c); 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (m != 0) { 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m); 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ms == NULL) { 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // casting away const, but `nested' won't be modified 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (important not to modify stored set) 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nested = const_cast<UnicodeSet*>(ms); 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setMode = 3; 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // -------- Handle a nested set. This either is inline in 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the pattern or represented by a stand-in that has 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // previously been parsed and was looked up in the symbol 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // table. 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (setMode != 0) { 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (lastItem == 1) { 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (op != 0) { 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Char expected after operator"); 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(lastChar, lastChar); 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _appendToPat(patLocal, lastChar, FALSE); 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastItem = 0; 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = 0; 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) { 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append(op); 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (nested == 0) { 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // lazy allocation 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!scratch.allocate()) { 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MEMORY_ALLOCATION_ERROR; 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nested = scratch.pointer(); 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (setMode) { 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec); 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.skipIgnored(opts); 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nested->applyPropertyPattern(chars, patLocal, ec); 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 3: // `nested' already parsed 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nested->_toPattern(patLocal, FALSE); 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org usePat = TRUE; 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mode == 0) { 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Entire pattern is a category; leave parse loop 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *this = *nested; 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mode = 2; 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (op) { 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case HYPHEN: /*'-'*/ 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org removeAll(*nested); 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case INTERSECTION: /*'&'*/ 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retainAll(*nested); 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0: 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org addAll(*nested); 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = 0; 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastItem = 2; 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mode == 0) { 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Missing '['"); 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // -------- Parse special (syntax) characters. If the 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // current character is not special, or if it is escaped, 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // then fall through and handle it below. 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!literal) { 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (c) { 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x5D /*']'*/: 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (lastItem == 1) { 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(lastChar, lastChar); 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _appendToPat(patLocal, lastChar, FALSE); 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Treat final trailing '-' as a literal 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (op == HYPHEN /*'-'*/) { 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(op, op); 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append(op); 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (op == INTERSECTION /*'&'*/) { 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Trailing '&'"); 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append((UChar) 0x5D /*']'*/); 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mode = 2; 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case HYPHEN /*'-'*/: 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (op == 0) { 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (lastItem != 0) { 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = (UChar) c; 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Treat final trailing '-' as a literal 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(c, c); 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = chars.next(opts, literal, ec); 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x5D /*']'*/ && !literal) { 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append(HYPHEN_RIGHT_BRACE, 2); 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mode = 2; 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "'-' not after char or set"); 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case INTERSECTION /*'&'*/: 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (lastItem == 2 && op == 0) { 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = (UChar) c; 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "'&' not after set"); 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x5E /*'^'*/: 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "'^' not after '['"); 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x7B /*'{'*/: 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (op != 0) { 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Missing operand after operator"); 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (lastItem == 1) { 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(lastChar, lastChar); 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _appendToPat(patLocal, lastChar, FALSE); 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastItem = 0; 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf.truncate(0); 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool ok = FALSE; 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (!chars.atEnd()) { 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = chars.next(opts, literal, ec); 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x7D /*'}'*/ && !literal) { 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ok = TRUE; 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf.append(c); 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (buf.length() < 1 || !ok) { 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Invalid multicharacter string"); 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We have new string. Add it to set and continue; 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // we don't need to drop through to the further 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // processing 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(buf); 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append((UChar) 0x7B /*'{'*/); 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _appendToPat(patLocal, buf, FALSE); 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append((UChar) 0x7D /*'}'*/); 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case SymbolTable::SYMBOL_REF: 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // symbols nosymbols 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [a-$] error error (ambiguous) 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [a$] anchor anchor 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [a-$x] var "x"* literal '$' 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [a-$.] error literal '$' 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // *We won't get here in the case of var "x" 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.getPos(backup); 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = chars.next(opts, literal, ec); 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool anchor = (c == 0x5D /*']'*/ && !literal); 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (symbols == 0 && !anchor) { 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = SymbolTable::SYMBOL_REF; 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.setPos(backup); 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; // literal '$' 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (anchor && op == 0) { 7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (lastItem == 1) { 7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(lastChar, lastChar); 7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _appendToPat(patLocal, lastChar, FALSE); 7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(U_ETHER); 7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org usePat = TRUE; 7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append((UChar) SymbolTable::SYMBOL_REF); 7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append((UChar) 0x5D /*']'*/); 7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mode = 2; 7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Unquoted '$'"); 7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // -------- Parse literal characters. This includes both 7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // escaped chars ("\u4E01") and non-syntax characters 7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // ("a"). 7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (lastItem) { 7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0: 7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastItem = 1; 7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastChar = c; 7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: 7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (op == HYPHEN /*'-'*/) { 7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (lastChar >= c) { 7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Don't allow redundant (a-a) or empty (b-a) ranges; 7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // these are most likely typos. 7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Invalid range"); 7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(lastChar, c); 7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _appendToPat(patLocal, lastChar, FALSE); 7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org patLocal.append(op); 7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _appendToPat(patLocal, c, FALSE); 7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastItem = 0; 7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = 0; 7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(lastChar, lastChar); 7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _appendToPat(patLocal, lastChar, FALSE); 7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastChar = c; 7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: 7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (op != 0) { 7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Set expected after operator"); 7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastChar = c; 7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastItem = 1; 7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mode != 2) { 8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Missing ']'"); 8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.skipIgnored(opts); 8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /** 8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Handle global flags (invert, case insensitivity). If this 8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pattern should be compiled case-insensitive, then we need 8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to close over case BEFORE COMPLEMENTING. This makes 8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * patterns like /[^abc]/i work. 8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((options & USET_CASE_INSENSITIVE) != 0) { 8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (this->*caseClosure)(USET_CASE_INSENSITIVE); 8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if ((options & USET_ADD_CASE_MAPPINGS) != 0) { 8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (this->*caseClosure)(USET_ADD_CASE_MAPPINGS); 8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (invert) { 8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org complement(); 8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Use the rebuilt pattern (patLocal) only if necessary. Prefer the 8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // generated pattern. 8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (usePat) { 8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org rebuiltPat.append(patLocal); 8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _generatePattern(rebuiltPat, FALSE); 8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isBogus() && U_SUCCESS(ec)) { 8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We likely ran out of memory. AHHH! 8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MEMORY_ALLOCATION_ERROR; 8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Property set implementation 8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool numericValueFilter(UChar32 ch, void* context) { 8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return u_getNumericValue(ch) == *(double*)context; 8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool generalCategoryMaskFilter(UChar32 ch, void* context) { 8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t value = *(int32_t*)context; 8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (U_GET_GC_MASK((UChar32) ch) & value) != 0; 8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool versionFilter(UChar32 ch, void* context) { 8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const UVersionInfo none = { 0, 0, 0, 0 }; 8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVersionInfo v; 8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_charAge(ch, v); 8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVersionInfo* version = (UVersionInfo*)context; 8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0; 8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtypedef struct { 8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UProperty prop; 8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t value; 8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} IntPropertyContext; 8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool intPropertyFilter(UChar32 ch, void* context) { 8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org IntPropertyContext* c = (IntPropertyContext*)context; 8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value; 8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool scriptExtensionsFilter(UChar32 ch, void* context) { 8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return uscript_hasScript(ch, *(UScriptCode*)context); 8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Generic filter-based scanning code for UCD property UnicodeSets. 8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid UnicodeSet::applyFilter(UnicodeSet::Filter filter, 8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void* context, 8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t src, 8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) return; 8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Logically, walk through all Unicode characters, noting the start 8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and end of each range for which filter.contain(c) is 8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // true. Add each range to a set. 8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // To improve performance, use an inclusions set which 8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // encodes information about character ranges that are known 8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to have identical properties. 8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // getInclusions(src) contains exactly the first characters of 8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // same-value ranges for the given properties "source". 8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet* inclusions = getInclusions(src, status); 8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clear(); 8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 startHasProperty = -1; 8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t limitRange = inclusions->getRangeCount(); 8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int j=0; j<limitRange; ++j) { 9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // get current range 9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 start = inclusions->getRangeStart(j); 9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 end = inclusions->getRangeEnd(j); 9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // for all the code points in the range, process 9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (UChar32 ch = start; ch <= end; ++ch) { 9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // only add to this UnicodeSet on inflection points -- 9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // where the hasProperty value changes to false 9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((*filter)(ch, context)) { 9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startHasProperty < 0) { 9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startHasProperty = ch; 9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (startHasProperty >= 0) { 9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(startHasProperty, ch-1); 9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startHasProperty = -1; 9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startHasProperty >= 0) { 9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add((UChar32)startHasProperty, (UChar32)0x10FFFF); 9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isBogus() && U_SUCCESS(status)) { 9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We likely ran out of memory. AHHH! 9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { 9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Note: we use ' ' in compiler code page */ 9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t j = 0; 9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char ch; 9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --dstCapacity; /* make room for term. zero */ 9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while ((ch = *src++) != 0) { 9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch == ' ' && (j==0 || (j>0 && dst[j-1]==' '))) { 9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (j >= dstCapacity) return FALSE; 9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dst[j++] = ch; 9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (j > 0 && dst[j-1] == ' ') --j; 9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dst[j] = 0; 9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Property set API 9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define FAIL(ec) {ec=U_ILLEGAL_ARGUMENT_ERROR; return *this;} 9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& 9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) { 9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec) || isFrozen()) return *this; 9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (prop == UCHAR_GENERAL_CATEGORY_MASK) { 9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec); 9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (prop == UCHAR_SCRIPT_EXTENSIONS) { 9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UScriptCode script = (UScriptCode)value; 9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyFilter(scriptExtensionsFilter, &script, UPROPS_SRC_PROPSVEC, ec); 9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org IntPropertyContext c = {prop, value}; 9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec); 9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& 9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet::applyPropertyAlias(const UnicodeString& prop, 9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& value, 9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& ec) { 9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec) || isFrozen()) return *this; 9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // prop and value used to be converted to char * using the default 9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // converter instead of the invariant conversion. 9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This should not be necessary because all Unicode property and value 9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // names use only invariant characters. 9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If there are any variant characters, then we won't find them anyway. 9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Checking first avoids assertion failures in the conversion. 9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( !uprv_isInvariantUString(prop.getBuffer(), prop.length()) || 9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org !uprv_isInvariantUString(value.getBuffer(), value.length()) 9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CharString pname, vname; 9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pname.appendInvariantChars(prop, ec); 9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org vname.appendInvariantChars(value, ec); 9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return *this; 9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UProperty p; 9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t v; 9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool mustNotBeEmpty = FALSE, invert = FALSE; 9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (value.length() > 0) { 9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p = u_getPropertyEnum(pname.data()); 9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (p == UCHAR_INVALID_CODE) FAIL(ec); 9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Treat gc as gcm 9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (p == UCHAR_GENERAL_CATEGORY) { 9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p = UCHAR_GENERAL_CATEGORY_MASK; 9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) || 10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) || 10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) { 10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org v = u_getPropertyValueEnum(p, vname.data()); 10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (v == UCHAR_INVALID_CODE) { 10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Handle numeric CCC 10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (p == UCHAR_CANONICAL_COMBINING_CLASS || 10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS || 10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) { 10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char* end; 10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org double value = uprv_strtod(vname.data(), &end); 10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org v = (int32_t) value; 10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (v != value || v < 0 || *end != 0) { 10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // non-integral or negative value, or trailing junk 10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the resultant set is empty then the numeric value 10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // was invalid. 10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mustNotBeEmpty = TRUE; 10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (p) { 10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_NUMERIC_VALUE: 10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char* end; 10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org double value = uprv_strtod(vname.data(), &end); 10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*end != 0) { 10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec); 10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_NAME: 10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Must munge name, since u_charFromName() does not do 10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 'loose' matching. 10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength 10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); 10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec); 10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(ec)) { 10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clear(); 10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org add(ch); 10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_UNICODE_1_NAME: 10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // ICU 49 deprecates the Unicode_1_Name property APIs. 10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_AGE: 10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Must munge name, since u_versionFromString() does not do 10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 'loose' matching. 10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char buf[128]; 10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); 10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVersionInfo version; 10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_versionFromString(version, buf); 10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyFilter(versionFilter, &version, UPROPS_SRC_PROPSVEC, ec); 10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_SCRIPT_EXTENSIONS: 10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data()); 10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (v == UCHAR_INVALID_CODE) { 10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // fall through to calling applyIntPropertyValue() 10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // p is a non-binary, non-enumerated property that we 10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // don't support (yet). 10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // value is empty. Interpret as General Category, Script, or 10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Binary property. 10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p = UCHAR_GENERAL_CATEGORY_MASK; 10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org v = u_getPropertyValueEnum(p, pname.data()); 10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (v == UCHAR_INVALID_CODE) { 10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p = UCHAR_SCRIPT; 10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org v = u_getPropertyValueEnum(p, pname.data()); 10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (v == UCHAR_INVALID_CODE) { 10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p = u_getPropertyEnum(pname.data()); 10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) { 10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org v = 1; 10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (0 == uprv_comparePropertyNames(ANY, pname.data())) { 10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set(MIN_VALUE, MAX_VALUE); 10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (0 == uprv_comparePropertyNames(ASCII, pname.data())) { 10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set(0, 0x7F); 11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (0 == uprv_comparePropertyNames(ASSIGNED, pname.data())) { 11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [:Assigned:]=[:^Cn:] 11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p = UCHAR_GENERAL_CATEGORY_MASK; 11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org v = U_GC_CN_MASK; 11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org invert = TRUE; 11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyIntPropertyValue(p, v, ec); 11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(invert) { 11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org complement(); 11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(ec) && (mustNotBeEmpty && isEmpty())) { 11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // mustNotBeEmpty is set to true if an empty set indicates 11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // invalid input. 11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_ILLEGAL_ARGUMENT_ERROR; 11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isBogus() && U_SUCCESS(ec)) { 11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We likely ran out of memory. AHHH! 11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MEMORY_ALLOCATION_ERROR; 11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Property set patterns 11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return true if the given position, in the given pattern, appears 11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to be the start of a property set pattern. 11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern, 11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t pos) { 11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Patterns are at least 5 characters long 11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((pos+5) > pattern.length()) { 11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look for an opening [:, [:^, \p, or \P 11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return isPOSIXOpen(pattern, pos) || isPerlOpen(pattern, pos) || isNameOpen(pattern, pos); 11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return true if the given iterator appears to point at a 11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * property pattern. Regardless of the result, return with the 11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * iterator unchanged. 11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param chars iterator over the pattern characters. Upon return 11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * it will be unchanged. 11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param iterOpts RuleCharacterIterator options 11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars, 11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t iterOpts) { 11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // NOTE: literal will always be FALSE, because we don't parse escapes. 11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool result = FALSE, literal; 11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode ec = U_ZERO_ERROR; 11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org iterOpts &= ~RuleCharacterIterator::PARSE_ESCAPES; 11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RuleCharacterIterator::Pos pos; 11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.getPos(pos); 11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = chars.next(iterOpts, literal, ec); 11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) { 11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE, 11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org literal, ec); 11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) : 11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/); 11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.setPos(pos); 11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result && U_SUCCESS(ec); 11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Parse the given property pattern at the given parse position. 11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern, 11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ParsePosition& ppos, 11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &ec) { 11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t pos = ppos.getIndex(); 11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool posix = FALSE; // true for [:pat:], false for \p{pat} \P{pat} \N{pat} 11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isName = FALSE; // true for \N{pat}, o/w false 11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool invert = FALSE; 11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return *this; 11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Minimum length is 5 characters, e.g. \p{L} 11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((pos+5) > pattern.length()) { 11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // On entry, ppos should point to one of the following locations: 11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look for an opening [:, [:^, \p, or \P 11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isPOSIXOpen(pattern, pos)) { 11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org posix = TRUE; 12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos += 2; 12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos = ICU_Utility::skipWhitespace(pattern, pos); 12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) { 12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++pos; 12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org invert = TRUE; 12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) { 12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c = pattern.charAt(pos+1); 12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org invert = (c == UPPER_P); 12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isName = (c == UPPER_N); 12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos += 2; 12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos = ICU_Utility::skipWhitespace(pattern, pos); 12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) { 12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Syntax error; "\p" or "\P" not followed by "{" 12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Open delimiter not seen 12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look for the matching close delimiter, either :] or } 12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t close; 12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (posix) { 12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org close = pattern.indexOf(POSIX_CLOSE, 2, pos); 12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org close = pattern.indexOf(CLOSE_BRACE, pos); 12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (close < 0) { 12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Syntax error; close delimiter missing 12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FAIL(ec); 12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look for an '=' sign. If this is present, we will parse a 12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // medium \p{gc=Cf} or long \p{GeneralCategory=Format} 12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // pattern. 12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t equals = pattern.indexOf(EQUALS, pos); 12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString propName, valueName; 12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (equals >= 0 && equals < close && !isName) { 12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Equals seen; parse medium/long pattern 12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.extractBetween(pos, equals, propName); 12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.extractBetween(equals+1, close, valueName); 12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Handle case where no '=' is seen, and \N{} 12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pattern.extractBetween(pos, close, propName); 12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Handle \N{name} 12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isName) { 12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This is a little inefficient since it means we have to 12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // parse NAME_PROP back to UCHAR_NAME even though we already 12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // know it's UCHAR_NAME. If we refactor the API to 12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // support args of (UProperty, char*) then we can remove 12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // NAME_PROP and make this a little more efficient. 12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org valueName = propName; 12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org propName = UnicodeString(NAME_PROP, NAME_PROP_LENGTH, US_INV); 12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPropertyAlias(propName, valueName, ec); 12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(ec)) { 12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (invert) { 12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org complement(); 12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Move to the limit position after the close delimiter if the 12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // parse succeeded. 12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ppos.setIndex(close + (posix ? 2 : 1)); 12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Parse a property pattern. 12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param chars iterator over the pattern characters. Upon return 12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * it will be advanced to the first character after the parsed 12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pattern, or the end of the iteration if all characters are 12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * parsed. 12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param rebuiltPat the pattern that was parsed, rebuilt or 12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * copied from the input pattern, as appropriate. 12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars, 12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString& rebuiltPat, 12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& ec) { 12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString pattern; 12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.lookahead(pattern); 12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ParsePosition pos(0); 12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPropertyPattern(pattern, pos, ec); 12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(ec)) return; 12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos.getIndex() == 0) { 12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Invalid property pattern"); 12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ec = U_MALFORMED_SET; 12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars.jumpahead(pos.getIndex()); 12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org rebuiltPat.append(pattern, 0, pos.getIndex()); 13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 1303