18393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/* 28393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius********************************************************************** 38393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 2012-2013, International Business Machines 48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Corporation and others. All Rights Reserved. 58393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius********************************************************************** 68393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*/ 78393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 88393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utypes.h" 98393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uchar.h" 118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utf16.h" 128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "identifier_info.h" 148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "mutex.h" 158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "scriptset.h" 168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "ucln_in.h" 178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "uvector.h" 188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_NAMESPACE_BEGIN 208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic UMutex gInitMutex = U_MUTEX_INITIALIZER; 248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic UBool gStaticsAreInitialized = FALSE; 258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUnicodeSet *IdentifierInfo::ASCII; 278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusScriptSet *IdentifierInfo::JAPANESE; 288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusScriptSet *IdentifierInfo::CHINESE; 298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusScriptSet *IdentifierInfo::KOREAN; 308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN; 318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUBool IdentifierInfo::cleanup() { 338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete ASCII; 348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ASCII = NULL; 358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete JAPANESE; 368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius JAPANESE = NULL; 378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete CHINESE; 388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius CHINESE = NULL; 398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete KOREAN; 408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius KOREAN = NULL; 418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete CONFUSABLE_WITH_LATIN; 428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius CONFUSABLE_WITH_LATIN = NULL; 438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius gStaticsAreInitialized = FALSE; 448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return TRUE; 458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_CDECL_BEGIN 488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic UBool U_CALLCONV 498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo_cleanup(void) { 508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return IdentifierInfo::cleanup(); 518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_CDECL_END 538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo::IdentifierInfo(UErrorCode &status): 568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL), 578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) { 588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return; 608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius { 628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius Mutex lock(&gInitMutex); 638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!gStaticsAreInitialized) { 648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ASCII = new UnicodeSet(0, 0x7f); 658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius JAPANESE = new ScriptSet(); 668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius CHINESE = new ScriptSet(); 678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius KOREAN = new ScriptSet(); 688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius CONFUSABLE_WITH_LATIN = new ScriptSet(); 698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL 708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius || CONFUSABLE_WITH_LATIN == NULL) { 718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return; 738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ASCII->freeze(); 758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status) 768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius .set(USCRIPT_KATAKANA, status); 778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status); 788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status); 798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status) 808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius .set(USCRIPT_CHEROKEE, status); 818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup); 828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius gStaticsAreInitialized = TRUE; 838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fIdentifier = new UnicodeString(); 868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRequiredScripts = new ScriptSet(); 878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status); 888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet); 898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates = new ScriptSet(); 908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fNumerics = new UnicodeSet(); 918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fIdentifierProfile = new UnicodeSet(0, 0x10FFFF); 928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL || 948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) { 958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo::~IdentifierInfo() { 1008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fIdentifier; 1018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fRequiredScripts; 1028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_close(fScriptSetSet); 1038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fCommonAmongAlternates; 1048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fNumerics; 1058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fIdentifierProfile; 1068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::clear() { 1108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRequiredScripts->resetAll(); 1118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_removeAll(fScriptSetSet); 1128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fNumerics->clear(); 1138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates->resetAll(); 1148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 1158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) { 1198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *fIdentifierProfile = identifierProfile; 1208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 1218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeSet &IdentifierInfo::getIdentifierProfile() const { 1258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *fIdentifierProfile; 1268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) { 1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 1318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 1328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *fIdentifier = identifier; 1348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius clear(); 1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet scriptsForCP; 1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UChar32 cp; 1378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) { 1388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius cp = identifier.char32At(i); 1398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Store a representative character for each kind of decimal digit 1408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) { 1418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value 1428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fNumerics->add(cp - (UChar32)u_getNumericValue(cp)); 1438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UScriptCode extensions[500]; 1458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, LENGTHOF(extensions), &status); 1468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 1478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 1488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptsForCP.resetAll(); 1508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t j=0; j<extensionsCount; j++) { 1518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptsForCP.set(extensions[j], status); 1528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptsForCP.reset(USCRIPT_COMMON, status); 1548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptsForCP.reset(USCRIPT_INHERITED, status); 1558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius switch (scriptsForCP.countMembers()) { 1568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case 0: break; 1578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case 1: 1588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Single script, record it. 1598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRequiredScripts->Union(scriptsForCP); 1608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius default: 1628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!fRequiredScripts->intersects(scriptsForCP) 1638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius && !uhash_geti(fScriptSetSet, &scriptsForCP)) { 1648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // If the set hasn't been added already, add it 1658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // (Add a copy, fScriptSetSet takes ownership of the copy.) 1668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status); 1678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Now make a final pass through ScriptSetSet to remove alternates that came before singles. 1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // [Kana], [Kana Hira] => [Kana] 1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // This is relatively infrequent, so doesn't have to be optimized. 1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // We also compute any commonalities among the alternates. 1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (uhash_count(fScriptSetSet) > 0) { 1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates->setAll(); 1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t it = -1;;) { 1788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it); 1798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (nextHashEl == NULL) { 1808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer); 1838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // [Kana], [Kana Hira] => [Kana] 1848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (fRequiredScripts->intersects(*next)) { 1858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_removeElement(fScriptSetSet, nextHashEl); 1868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } else { 1878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates->intersect(*next); 1888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]] 1898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t otherIt = -1;;) { 1908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt); 1918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (otherHashEl == NULL) { 1928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer); 1958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (next != other && next->contains(*other)) { 1968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_removeElement(fScriptSetSet, nextHashEl); 1978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (uhash_count(fScriptSetSet) == 0) { 2048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates->resetAll(); 2058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 2078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeString *IdentifierInfo::getIdentifier() const { 2118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fIdentifier; 2128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst ScriptSet *IdentifierInfo::getScripts() const { 2158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fRequiredScripts; 2168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UHashtable *IdentifierInfo::getAlternates() const { 2198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fScriptSetSet; 2208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeSet *IdentifierInfo::getNumerics() const { 2248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fNumerics; 2258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst ScriptSet *IdentifierInfo::getCommonAmongAlternates() const { 2288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fCommonAmongAlternates; 2298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#if !UCONFIG_NO_NORMALIZATION 2328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusURestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const { 2348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) { 2358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_UNRESTRICTIVE; 2368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (ASCII->containsAll(*fIdentifier)) { 2388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_ASCII; 2398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // This is a bit tricky. We look at a number of factors. 2418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // The number of scripts in the text. 2428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc]) 2438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.) 2448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the 2468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // time it is created, in setIdentifier(). 2478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius int32_t cardinalityPlus = fRequiredScripts->countMembers() + 2488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1); 2498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (cardinalityPlus < 2) { 2508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_HIGHLY_RESTRICTIVE; 2518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts) 2538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius || containsWithAlternates(*KOREAN, *fRequiredScripts)) { 2548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_HIGHLY_RESTRICTIVE; 2558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (cardinalityPlus == 2 && 2578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRequiredScripts->test(USCRIPT_LATIN, status) && 2588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) { 2598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_MODERATELY_RESTRICTIVE; 2608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_MINIMALLY_RESTRICTIVE; 2628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif /* !UCONFIG_NO_NORMALIZATION */ 2658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t IdentifierInfo::getScriptCount() const { 2678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts. 2688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius int32_t count = fRequiredScripts->countMembers() + 2698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1); 2708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return count; 2718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const { 2768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!container.contains(containee)) { 2778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return FALSE; 2788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t iter = -1; ;) { 2808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter); 2818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (hashEl == NULL) { 2828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 2838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer); 2858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!container.intersects(*alternatives)) { 2868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return false; 2878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return true; 2908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) { 2938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UVector sorted(status); 2948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 2958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return dest; 2968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t pos = -1; ;) { 2988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UHashElement *el = uhash_nextElement(alternates, &pos); 2998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (el == NULL) { 3008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 3018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 3028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer); 3038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius sorted.addElement(ss, status); 3048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 3058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius sorted.sort(uhash_compareScriptSet, status); 3068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeString separator = UNICODE_STRING_SIMPLE("; "); 3078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t i=0; i<sorted.size(); i++) { 3088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (i>0) { 3098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius dest.append(separator); 3108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 3118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i)); 3128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ss->displayScripts(dest); 3138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 3148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return dest; 3158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 3168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 3178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_NAMESPACE_END 3188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 319