16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org********************************************************************** 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2012-2013, International Business Machines 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org********************************************************************** 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h" 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "identifier_info.h" 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "mutex.h" 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "scriptset.h" 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucln_in.h" 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h" 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UMutex gInitMutex = U_MUTEX_INITIALIZER; 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool gStaticsAreInitialized = FALSE; 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet *IdentifierInfo::ASCII; 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgScriptSet *IdentifierInfo::JAPANESE; 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgScriptSet *IdentifierInfo::CHINESE; 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgScriptSet *IdentifierInfo::KOREAN; 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN; 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool IdentifierInfo::cleanup() { 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete ASCII; 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ASCII = NULL; 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete JAPANESE; 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org JAPANESE = NULL; 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete CHINESE; 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CHINESE = NULL; 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete KOREAN; 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org KOREAN = NULL; 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete CONFUSABLE_WITH_LATIN; 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CONFUSABLE_WITH_LATIN = NULL; 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gStaticsAreInitialized = FALSE; 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo_cleanup(void) { 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return IdentifierInfo::cleanup(); 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo::IdentifierInfo(UErrorCode &status): 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL), 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) { 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Mutex lock(&gInitMutex); 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!gStaticsAreInitialized) { 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ASCII = new UnicodeSet(0, 0x7f); 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org JAPANESE = new ScriptSet(); 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CHINESE = new ScriptSet(); 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org KOREAN = new ScriptSet(); 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CONFUSABLE_WITH_LATIN = new ScriptSet(); 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org || CONFUSABLE_WITH_LATIN == NULL) { 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ASCII->freeze(); 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status) 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org .set(USCRIPT_KATAKANA, status); 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status); 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status); 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status) 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org .set(USCRIPT_CHEROKEE, status); 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup); 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gStaticsAreInitialized = TRUE; 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fIdentifier = new UnicodeString(); 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequiredScripts = new ScriptSet(); 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status); 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet); 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCommonAmongAlternates = new ScriptSet(); 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fNumerics = new UnicodeSet(); 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fIdentifierProfile = new UnicodeSet(0, 0x10FFFF); 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL || 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) { 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo::~IdentifierInfo() { 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fIdentifier; 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fRequiredScripts; 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_close(fScriptSetSet); 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fCommonAmongAlternates; 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fNumerics; 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fIdentifierProfile; 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo &IdentifierInfo::clear() { 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequiredScripts->resetAll(); 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_removeAll(fScriptSetSet); 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fNumerics->clear(); 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCommonAmongAlternates->resetAll(); 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) { 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *fIdentifierProfile = identifierProfile; 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeSet &IdentifierInfo::getIdentifierProfile() const { 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *fIdentifierProfile; 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) { 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *fIdentifier = identifier; 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clear(); 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ScriptSet scriptsForCP; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 cp; 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) { 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cp = identifier.char32At(i); 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Store a representative character for each kind of decimal digit 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) { 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fNumerics->add(cp - (UChar32)u_getNumericValue(cp)); 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UScriptCode extensions[500]; 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, LENGTHOF(extensions), &status); 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org scriptsForCP.resetAll(); 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t j=0; j<extensionsCount; j++) { 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org scriptsForCP.set(extensions[j], status); 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org scriptsForCP.reset(USCRIPT_COMMON, status); 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org scriptsForCP.reset(USCRIPT_INHERITED, status); 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (scriptsForCP.countMembers()) { 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0: break; 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Single script, record it. 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequiredScripts->Union(scriptsForCP); 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!fRequiredScripts->intersects(scriptsForCP) 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org && !uhash_geti(fScriptSetSet, &scriptsForCP)) { 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the set hasn't been added already, add it 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Add a copy, fScriptSetSet takes ownership of the copy.) 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status); 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Now make a final pass through ScriptSetSet to remove alternates that came before singles. 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [Kana], [Kana Hira] => [Kana] 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This is relatively infrequent, so doesn't have to be optimized. 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We also compute any commonalities among the alternates. 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (uhash_count(fScriptSetSet) > 0) { 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCommonAmongAlternates->setAll(); 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t it = -1;;) { 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it); 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (nextHashEl == NULL) { 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer); 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [Kana], [Kana Hira] => [Kana] 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fRequiredScripts->intersects(*next)) { 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_removeElement(fScriptSetSet, nextHashEl); 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCommonAmongAlternates->intersect(*next); 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]] 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t otherIt = -1;;) { 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt); 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (otherHashEl == NULL) { 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer); 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (next != other && next->contains(*other)) { 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_removeElement(fScriptSetSet, nextHashEl); 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (uhash_count(fScriptSetSet) == 0) { 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCommonAmongAlternates->resetAll(); 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeString *IdentifierInfo::getIdentifier() const { 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fIdentifier; 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst ScriptSet *IdentifierInfo::getScripts() const { 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fRequiredScripts; 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UHashtable *IdentifierInfo::getAlternates() const { 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fScriptSetSet; 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeSet *IdentifierInfo::getNumerics() const { 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fNumerics; 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst ScriptSet *IdentifierInfo::getCommonAmongAlternates() const { 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fCommonAmongAlternates; 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgURestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const { 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) { 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return USPOOF_UNRESTRICTIVE; 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ASCII->containsAll(*fIdentifier)) { 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return USPOOF_ASCII; 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This is a bit tricky. We look at a number of factors. 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The number of scripts in the text. 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc]) 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.) 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // time it is created, in setIdentifier(). 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t cardinalityPlus = fRequiredScripts->countMembers() + 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1); 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cardinalityPlus < 2) { 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return USPOOF_HIGHLY_RESTRICTIVE; 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts) 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org || containsWithAlternates(*KOREAN, *fRequiredScripts)) { 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return USPOOF_HIGHLY_RESTRICTIVE; 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cardinalityPlus == 2 && 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequiredScripts->test(USCRIPT_LATIN, status) && 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) { 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return USPOOF_MODERATELY_RESTRICTIVE; 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return USPOOF_MINIMALLY_RESTRICTIVE; 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* !UCONFIG_NO_NORMALIZATION */ 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t IdentifierInfo::getScriptCount() const { 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts. 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t count = fRequiredScripts->countMembers() + 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1); 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return count; 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const { 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!container.contains(containee)) { 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t iter = -1; ;) { 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter); 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (hashEl == NULL) { 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer); 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!container.intersects(*alternatives)) { 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return false; 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return true; 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) { 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVector sorted(status); 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t pos = -1; ;) { 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UHashElement *el = uhash_nextElement(alternates, &pos); 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (el == NULL) { 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer); 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sorted.addElement(ss, status); 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sorted.sort(uhash_compareScriptSet, status); 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString separator = UNICODE_STRING_SIMPLE("; "); 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t i=0; i<sorted.size(); i++) { 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i>0) { 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest.append(separator); 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i)); 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ss->displayScripts(dest); 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 319