16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 2012-2013, International Business Machines
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h"
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "identifier_info.h"
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "mutex.h"
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "scriptset.h"
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucln_in.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UMutex gInitMutex = U_MUTEX_INITIALIZER;
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool gStaticsAreInitialized = FALSE;
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet *IdentifierInfo::ASCII;
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgScriptSet *IdentifierInfo::JAPANESE;
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgScriptSet *IdentifierInfo::CHINESE;
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgScriptSet *IdentifierInfo::KOREAN;
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgScriptSet *IdentifierInfo::CONFUSABLE_WITH_LATIN;
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool IdentifierInfo::cleanup() {
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete ASCII;
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ASCII = NULL;
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete JAPANESE;
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    JAPANESE = NULL;
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete CHINESE;
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CHINESE = NULL;
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete KOREAN;
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    KOREAN = NULL;
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete CONFUSABLE_WITH_LATIN;
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CONFUSABLE_WITH_LATIN = NULL;
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    gStaticsAreInitialized = FALSE;
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo_cleanup(void) {
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return IdentifierInfo::cleanup();
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo::IdentifierInfo(UErrorCode &status):
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL),
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        Mutex lock(&gInitMutex);
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!gStaticsAreInitialized) {
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ASCII    = new UnicodeSet(0, 0x7f);
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            JAPANESE = new ScriptSet();
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            CHINESE  = new ScriptSet();
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            KOREAN   = new ScriptSet();
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            CONFUSABLE_WITH_LATIN = new ScriptSet();
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    || CONFUSABLE_WITH_LATIN == NULL) {
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                status = U_MEMORY_ALLOCATION_ERROR;
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return;
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ASCII->freeze();
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     .set(USCRIPT_KATAKANA, status);
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      .set(USCRIPT_CHEROKEE, status);
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            gStaticsAreInitialized = TRUE;
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fIdentifier = new UnicodeString();
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRequiredScripts = new ScriptSet();
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status);
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet);
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fCommonAmongAlternates = new ScriptSet();
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fNumerics = new UnicodeSet();
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fIdentifierProfile = new UnicodeSet(0, 0x10FFFF);
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL ||
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) {
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_MEMORY_ALLOCATION_ERROR;
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo::~IdentifierInfo() {
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fIdentifier;
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fRequiredScripts;
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uhash_close(fScriptSetSet);
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fCommonAmongAlternates;
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fNumerics;
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fIdentifierProfile;
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo &IdentifierInfo::clear() {
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fRequiredScripts->resetAll();
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uhash_removeAll(fScriptSetSet);
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fNumerics->clear();
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fCommonAmongAlternates->resetAll();
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) {
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    *fIdentifierProfile = identifierProfile;
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeSet &IdentifierInfo::getIdentifierProfile() const {
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *fIdentifierProfile;
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgIdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) {
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return *this;
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    *fIdentifier = identifier;
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    clear();
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ScriptSet scriptsForCP;
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 cp;
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cp = identifier.char32At(i);
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Store a representative character for each kind of decimal digit
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fNumerics->add(cp - (UChar32)u_getNumericValue(cp));
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UScriptCode extensions[500];
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, LENGTHOF(extensions), &status);
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (U_FAILURE(status)) {
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return *this;
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        scriptsForCP.resetAll();
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (int32_t j=0; j<extensionsCount; j++) {
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            scriptsForCP.set(extensions[j], status);
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        scriptsForCP.reset(USCRIPT_COMMON, status);
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        scriptsForCP.reset(USCRIPT_INHERITED, status);
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        switch (scriptsForCP.countMembers()) {
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          case 0: break;
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          case 1:
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Single script, record it.
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRequiredScripts->Union(scriptsForCP);
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          default:
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (!fRequiredScripts->intersects(scriptsForCP)
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    && !uhash_geti(fScriptSetSet, &scriptsForCP)) {
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If the set hasn't been added already, add it
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                //    (Add a copy, fScriptSetSet takes ownership of the copy.)
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status);
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Now make a final pass through ScriptSetSet to remove alternates that came before singles.
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // [Kana], [Kana Hira] => [Kana]
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // This is relatively infrequent, so doesn't have to be optimized.
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // We also compute any commonalities among the alternates.
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (uhash_count(fScriptSetSet) > 0) {
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fCommonAmongAlternates->setAll();
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for (int32_t it = -1;;) {
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it);
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (nextHashEl == NULL) {
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer);
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // [Kana], [Kana Hira] => [Kana]
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (fRequiredScripts->intersects(*next)) {
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uhash_removeElement(fScriptSetSet, nextHashEl);
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fCommonAmongAlternates->intersect(*next);
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (int32_t otherIt = -1;;) {
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt);
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (otherHashEl == NULL) {
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer);
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (next != other && next->contains(*other)) {
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        uhash_removeElement(fScriptSetSet, nextHashEl);
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (uhash_count(fScriptSetSet) == 0) {
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fCommonAmongAlternates->resetAll();
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *this;
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeString *IdentifierInfo::getIdentifier() const {
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fIdentifier;
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst ScriptSet *IdentifierInfo::getScripts() const {
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fRequiredScripts;
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UHashtable *IdentifierInfo::getAlternates() const {
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fScriptSetSet;
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeSet *IdentifierInfo::getNumerics() const {
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fNumerics;
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst ScriptSet *IdentifierInfo::getCommonAmongAlternates() const {
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fCommonAmongAlternates;
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgURestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const {
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) {
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return USPOOF_UNRESTRICTIVE;
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (ASCII->containsAll(*fIdentifier)) {
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return USPOOF_ASCII;
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // This is a bit tricky. We look at a number of factors.
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The number of scripts in the text.
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc])
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.)
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //       time it is created, in setIdentifier().
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t cardinalityPlus = fRequiredScripts->countMembers() +
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (cardinalityPlus < 2) {
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return USPOOF_HIGHLY_RESTRICTIVE;
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts)
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            || containsWithAlternates(*KOREAN, *fRequiredScripts)) {
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return USPOOF_HIGHLY_RESTRICTIVE;
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (cardinalityPlus == 2 &&
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fRequiredScripts->test(USCRIPT_LATIN, status) &&
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) {
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return USPOOF_MODERATELY_RESTRICTIVE;
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return USPOOF_MINIMALLY_RESTRICTIVE;
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* !UCONFIG_NO_NORMALIZATION */
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t IdentifierInfo::getScriptCount() const {
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts.
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t count = fRequiredScripts->countMembers() +
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return count;
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const {
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!container.contains(containee)) {
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (int32_t iter = -1; ;) {
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter);
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (hashEl == NULL) {
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer);
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!container.intersects(*alternatives)) {
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return false;
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return true;
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) {
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector sorted(status);
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return dest;
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (int32_t pos = -1; ;) {
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UHashElement *el = uhash_nextElement(alternates, &pos);
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (el == NULL) {
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer);
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        sorted.addElement(ss, status);
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sorted.sort(uhash_compareScriptSet, status);
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString separator = UNICODE_STRING_SIMPLE("; ");
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (int32_t i=0; i<sorted.size(); i++) {
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (i>0) {
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dest.append(separator);
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i));
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ss->displayScripts(dest);
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return dest;
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
319