18393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/* 28393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius********************************************************************** 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2012-2014, International Business Machines 48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Corporation and others. All Rights Reserved. 58393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius********************************************************************** 68393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*/ 78393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 88393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utypes.h" 98393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uchar.h" 118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utf16.h" 128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "identifier_info.h" 148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "mutex.h" 158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "scriptset.h" 168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "ucln_in.h" 178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "uvector.h" 188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_NAMESPACE_BEGIN 208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UnicodeSet *ASCII; 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic ScriptSet *JAPANESE; 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic ScriptSet *CHINESE; 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic ScriptSet *KOREAN; 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic ScriptSet *CONFUSABLE_WITH_LATIN; 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER; 278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_BEGIN 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UBool U_CALLCONV 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusIdentifierInfo_cleanup(void) { 328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete ASCII; 338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ASCII = NULL; 348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete JAPANESE; 358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius JAPANESE = NULL; 368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete CHINESE; 378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius CHINESE = NULL; 388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete KOREAN; 398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius KOREAN = NULL; 408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete CONFUSABLE_WITH_LATIN; 418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius CONFUSABLE_WITH_LATIN = NULL; 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius gIdentifierInfoInitOnce.reset(); 438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return TRUE; 448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic void U_CALLCONV 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusIdentifierInfo_init(UErrorCode &status) { 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ASCII = new UnicodeSet(0, 0x7f); 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius JAPANESE = new ScriptSet(); 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CHINESE = new ScriptSet(); 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius KOREAN = new ScriptSet(); 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CONFUSABLE_WITH_LATIN = new ScriptSet(); 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius || CONFUSABLE_WITH_LATIN == NULL) { 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_MEMORY_ALLOCATION_ERROR; 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ASCII->freeze(); 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status) 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius .set(USCRIPT_KATAKANA, status); 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status); 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status); 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status) 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius .set(USCRIPT_CHEROKEE, status); 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup); 668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_CDECL_END 688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo::IdentifierInfo(UErrorCode &status): 718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL), 728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) { 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status); 748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return; 768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fIdentifier = new UnicodeString(); 798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRequiredScripts = new ScriptSet(); 808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status); 818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet); 828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates = new ScriptSet(); 838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fNumerics = new UnicodeSet(); 848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fIdentifierProfile = new UnicodeSet(0, 0x10FFFF); 858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL || 878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) { 888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo::~IdentifierInfo() { 938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fIdentifier; 948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fRequiredScripts; 958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_close(fScriptSetSet); 968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fCommonAmongAlternates; 978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fNumerics; 988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fIdentifierProfile; 998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::clear() { 1038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRequiredScripts->resetAll(); 1048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_removeAll(fScriptSetSet); 1058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fNumerics->clear(); 1068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates->resetAll(); 1078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 1088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) { 1128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *fIdentifierProfile = identifierProfile; 1138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 1148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeSet &IdentifierInfo::getIdentifierProfile() const { 1188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *fIdentifierProfile; 1198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 1228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) { 1238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 1248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 1258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius *fIdentifier = identifier; 1278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius clear(); 1288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet scriptsForCP; 1298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UChar32 cp; 1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) { 1318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius cp = identifier.char32At(i); 1328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Store a representative character for each kind of decimal digit 1338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) { 1348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value 1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fNumerics->add(cp - (UChar32)u_getNumericValue(cp)); 1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UScriptCode extensions[500]; 138f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UPRV_LENGTHOF(extensions), &status); 1398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 1408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 1418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptsForCP.resetAll(); 1438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t j=0; j<extensionsCount; j++) { 1448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptsForCP.set(extensions[j], status); 1458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptsForCP.reset(USCRIPT_COMMON, status); 1478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptsForCP.reset(USCRIPT_INHERITED, status); 1488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius switch (scriptsForCP.countMembers()) { 1498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case 0: break; 1508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case 1: 1518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Single script, record it. 1528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRequiredScripts->Union(scriptsForCP); 1538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius default: 1558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!fRequiredScripts->intersects(scriptsForCP) 1568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius && !uhash_geti(fScriptSetSet, &scriptsForCP)) { 1578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // If the set hasn't been added already, add it 1588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // (Add a copy, fScriptSetSet takes ownership of the copy.) 1598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status); 1608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Now make a final pass through ScriptSetSet to remove alternates that came before singles. 1658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // [Kana], [Kana Hira] => [Kana] 1668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // This is relatively infrequent, so doesn't have to be optimized. 1678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // We also compute any commonalities among the alternates. 1688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (uhash_count(fScriptSetSet) > 0) { 1698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates->setAll(); 1701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert for (int32_t it = UHASH_FIRST;;) { 1718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it); 1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (nextHashEl == NULL) { 1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer); 1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // [Kana], [Kana Hira] => [Kana] 1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (fRequiredScripts->intersects(*next)) { 1788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_removeElement(fScriptSetSet, nextHashEl); 1798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } else { 1808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates->intersect(*next); 1818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]] 1821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert for (int32_t otherIt = UHASH_FIRST;;) { 1838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt); 1848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (otherHashEl == NULL) { 1858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer); 1888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (next != other && next->contains(*other)) { 1898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius uhash_removeElement(fScriptSetSet, nextHashEl); 1908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (uhash_count(fScriptSetSet) == 0) { 1978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fCommonAmongAlternates->resetAll(); 1988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return *this; 2008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeString *IdentifierInfo::getIdentifier() const { 2048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fIdentifier; 2058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst ScriptSet *IdentifierInfo::getScripts() const { 2088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fRequiredScripts; 2098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UHashtable *IdentifierInfo::getAlternates() const { 2128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fScriptSetSet; 2138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeSet *IdentifierInfo::getNumerics() const { 2178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fNumerics; 2188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst ScriptSet *IdentifierInfo::getCommonAmongAlternates() const { 2218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return fCommonAmongAlternates; 2228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#if !UCONFIG_NO_NORMALIZATION 2258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusURestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const { 2278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) { 2288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_UNRESTRICTIVE; 2298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (ASCII->containsAll(*fIdentifier)) { 2318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_ASCII; 2328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // This is a bit tricky. We look at a number of factors. 2348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // The number of scripts in the text. 2358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc]) 2368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.) 2378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the 2398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // time it is created, in setIdentifier(). 2408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius int32_t cardinalityPlus = fRequiredScripts->countMembers() + 2418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1); 2428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (cardinalityPlus < 2) { 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return USPOOF_SINGLE_SCRIPT_RESTRICTIVE; 2448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts) 2468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius || containsWithAlternates(*KOREAN, *fRequiredScripts)) { 2478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_HIGHLY_RESTRICTIVE; 2488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (cardinalityPlus == 2 && 2508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRequiredScripts->test(USCRIPT_LATIN, status) && 2518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) { 2528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_MODERATELY_RESTRICTIVE; 2538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USPOOF_MINIMALLY_RESTRICTIVE; 2558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif /* !UCONFIG_NO_NORMALIZATION */ 2588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t IdentifierInfo::getScriptCount() const { 2608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts. 2618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius int32_t count = fRequiredScripts->countMembers() + 2628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1); 2638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return count; 2648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const { 2698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!container.contains(containee)) { 2708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return FALSE; 2718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert for (int32_t iter = UHASH_FIRST; ;) { 2738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter); 2748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (hashEl == NULL) { 2758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 2768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer); 2788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (!container.intersects(*alternatives)) { 2798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return false; 2808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return true; 2838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 2848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 2858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) { 2868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UVector sorted(status); 2878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 2888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return dest; 2898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert for (int32_t pos = UHASH_FIRST; ;) { 2918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UHashElement *el = uhash_nextElement(alternates, &pos); 2928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (el == NULL) { 2938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 2948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer); 2968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius sorted.addElement(ss, status); 2978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 2988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius sorted.sort(uhash_compareScriptSet, status); 2998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeString separator = UNICODE_STRING_SIMPLE("; "); 3008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t i=0; i<sorted.size(); i++) { 3018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (i>0) { 3028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius dest.append(separator); 3038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 3048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i)); 3058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ss->displayScripts(dest); 3068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 3078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return dest; 3088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 3098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 3108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_NAMESPACE_END 3118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 312