18393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/*
28393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius**********************************************************************
38393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*   Copyright (C) 2012-2013, International Business Machines
48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*   Corporation and others.  All Rights Reserved.
58393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius**********************************************************************
68393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*/
78393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
88393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utypes.h"
98393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uchar.h"
118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utf16.h"
128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "identifier_info.h"
148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "mutex.h"
158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "scriptset.h"
168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "ucln_in.h"
178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "uvector.h"
188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_NAMESPACE_BEGIN
208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic UMutex gInitMutex = U_MUTEX_INITIALIZER;
248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic UBool gStaticsAreInitialized = FALSE;
258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUnicodeSet *IdentifierInfo::ASCII;
278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusScriptSet *IdentifierInfo::JAPANESE;
288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusScriptSet *IdentifierInfo::CHINESE;
298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusScriptSet *IdentifierInfo::KOREAN;
308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN;
318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUBool IdentifierInfo::cleanup() {
338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete ASCII;
348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ASCII = NULL;
358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete JAPANESE;
368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    JAPANESE = NULL;
378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete CHINESE;
388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    CHINESE = NULL;
398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete KOREAN;
408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    KOREAN = NULL;
418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete CONFUSABLE_WITH_LATIN;
428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    CONFUSABLE_WITH_LATIN = NULL;
438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    gStaticsAreInitialized = FALSE;
448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return TRUE;
458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_CDECL_BEGIN
488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic UBool U_CALLCONV
498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo_cleanup(void) {
508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return IdentifierInfo::cleanup();
518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_CDECL_END
538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo::IdentifierInfo(UErrorCode &status):
568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL),
578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {
588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    {
628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        Mutex lock(&gInitMutex);
638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (!gStaticsAreInitialized) {
648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            ASCII    = new UnicodeSet(0, 0x7f);
658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            JAPANESE = new ScriptSet();
668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            CHINESE  = new ScriptSet();
678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            KOREAN   = new ScriptSet();
688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            CONFUSABLE_WITH_LATIN = new ScriptSet();
698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL
708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    || CONFUSABLE_WITH_LATIN == NULL) {
718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                status = U_MEMORY_ALLOCATION_ERROR;
728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                return;
738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            ASCII->freeze();
758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                     .set(USCRIPT_KATAKANA, status);
778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                      .set(USCRIPT_CHEROKEE, status);
818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            gStaticsAreInitialized = TRUE;
838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fIdentifier = new UnicodeString();
868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fRequiredScripts = new ScriptSet();
878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status);
888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet);
898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fCommonAmongAlternates = new ScriptSet();
908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fNumerics = new UnicodeSet();
918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fIdentifierProfile = new UnicodeSet(0, 0x10FFFF);
928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL ||
948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                              fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) {
958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo::~IdentifierInfo() {
1008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fIdentifier;
1018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fRequiredScripts;
1028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_close(fScriptSetSet);
1038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fCommonAmongAlternates;
1048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fNumerics;
1058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fIdentifierProfile;
1068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::clear() {
1108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fRequiredScripts->resetAll();
1118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_removeAll(fScriptSetSet);
1128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fNumerics->clear();
1138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fCommonAmongAlternates->resetAll();
1148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return *this;
1158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) {
1198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    *fIdentifierProfile = identifierProfile;
1208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return *this;
1218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeSet &IdentifierInfo::getIdentifierProfile() const {
1258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return *fIdentifierProfile;
1268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) {
1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
1318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return *this;
1328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    *fIdentifier = identifier;
1348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    clear();
1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet scriptsForCP;
1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UChar32 cp;
1378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {
1388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        cp = identifier.char32At(i);
1398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // Store a representative character for each kind of decimal digit
1408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {
1418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
1428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            fNumerics->add(cp - (UChar32)u_getNumericValue(cp));
1438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UScriptCode extensions[500];
1458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, LENGTHOF(extensions), &status);
1468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (U_FAILURE(status)) {
1478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return *this;
1488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        scriptsForCP.resetAll();
1508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        for (int32_t j=0; j<extensionsCount; j++) {
1518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            scriptsForCP.set(extensions[j], status);
1528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        scriptsForCP.reset(USCRIPT_COMMON, status);
1548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        scriptsForCP.reset(USCRIPT_INHERITED, status);
1558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        switch (scriptsForCP.countMembers()) {
1568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          case 0: break;
1578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          case 1:
1588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // Single script, record it.
1598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            fRequiredScripts->Union(scriptsForCP);
1608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
1618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          default:
1628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (!fRequiredScripts->intersects(scriptsForCP)
1638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    && !uhash_geti(fScriptSetSet, &scriptsForCP)) {
1648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // If the set hasn't been added already, add it
1658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                //    (Add a copy, fScriptSetSet takes ownership of the copy.)
1668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status);
1678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
1698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Now make a final pass through ScriptSetSet to remove alternates that came before singles.
1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // [Kana], [Kana Hira] => [Kana]
1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // This is relatively infrequent, so doesn't have to be optimized.
1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // We also compute any commonalities among the alternates.
1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (uhash_count(fScriptSetSet) > 0) {
1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        fCommonAmongAlternates->setAll();
1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        for (int32_t it = -1;;) {
1788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it);
1798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (nextHashEl == NULL) {
1808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                break;
1818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer);
1838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // [Kana], [Kana Hira] => [Kana]
1848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (fRequiredScripts->intersects(*next)) {
1858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                uhash_removeElement(fScriptSetSet, nextHashEl);
1868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            } else {
1878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                fCommonAmongAlternates->intersect(*next);
1888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
1898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                for (int32_t otherIt = -1;;) {
1908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt);
1918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    if (otherHashEl == NULL) {
1928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        break;
1938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    }
1948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer);
1958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    if (next != other && next->contains(*other)) {
1968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        uhash_removeElement(fScriptSetSet, nextHashEl);
1978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        break;
1988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    }
1998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                }
2008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
2018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (uhash_count(fScriptSetSet) == 0) {
2048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        fCommonAmongAlternates->resetAll();
2058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return *this;
2078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeString *IdentifierInfo::getIdentifier() const {
2118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fIdentifier;
2128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst ScriptSet *IdentifierInfo::getScripts() const {
2158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fRequiredScripts;
2168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UHashtable *IdentifierInfo::getAlternates() const {
2198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fScriptSetSet;
2208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeSet *IdentifierInfo::getNumerics() const {
2248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fNumerics;
2258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst ScriptSet *IdentifierInfo::getCommonAmongAlternates() const {
2288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fCommonAmongAlternates;
2298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#if !UCONFIG_NO_NORMALIZATION
2328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusURestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const {
2348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) {
2358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_UNRESTRICTIVE;
2368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (ASCII->containsAll(*fIdentifier)) {
2388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_ASCII;
2398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // This is a bit tricky. We look at a number of factors.
2418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // The number of scripts in the text.
2428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc])
2438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.)
2448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the
2468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //       time it is created, in setIdentifier().
2478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t cardinalityPlus = fRequiredScripts->countMembers() +
2488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
2498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (cardinalityPlus < 2) {
2508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_HIGHLY_RESTRICTIVE;
2518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts)
2538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            || containsWithAlternates(*KOREAN, *fRequiredScripts)) {
2548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_HIGHLY_RESTRICTIVE;
2558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (cardinalityPlus == 2 &&
2578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            fRequiredScripts->test(USCRIPT_LATIN, status) &&
2588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) {
2598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_MODERATELY_RESTRICTIVE;
2608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return USPOOF_MINIMALLY_RESTRICTIVE;
2628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif /* !UCONFIG_NO_NORMALIZATION */
2658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t IdentifierInfo::getScriptCount() const {
2678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts.
2688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t count = fRequiredScripts->countMembers() +
2698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
2708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return count;
2718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const {
2768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (!container.contains(containee)) {
2778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return FALSE;
2788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t iter = -1; ;) {
2808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter);
2818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (hashEl == NULL) {
2828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
2838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer);
2858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (!container.intersects(*alternatives)) {
2868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return false;
2878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return true;
2908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) {
2938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UVector sorted(status);
2948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
2958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return dest;
2968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t pos = -1; ;) {
2988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const UHashElement *el = uhash_nextElement(alternates, &pos);
2998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (el == NULL) {
3008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
3018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
3028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer);
3038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        sorted.addElement(ss, status);
3048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
3058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    sorted.sort(uhash_compareScriptSet, status);
3068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UnicodeString separator = UNICODE_STRING_SIMPLE("; ");
3078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t i=0; i<sorted.size(); i++) {
3088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (i>0) {
3098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            dest.append(separator);
3108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
3118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i));
3128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ss->displayScripts(dest);
3138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
3148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return dest;
3158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
3168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_NAMESPACE_END
3188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
319