18393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/*
28393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius**********************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   Copyright (C) 2012-2014, International Business Machines
48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*   Corporation and others.  All Rights Reserved.
58393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius**********************************************************************
68393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*/
78393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
88393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utypes.h"
98393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uchar.h"
118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utf16.h"
128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "identifier_info.h"
148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "mutex.h"
158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "scriptset.h"
168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "ucln_in.h"
178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "uvector.h"
188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_NAMESPACE_BEGIN
208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UnicodeSet *ASCII;
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic ScriptSet *JAPANESE;
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic ScriptSet *CHINESE;
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic ScriptSet *KOREAN;
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic ScriptSet *CONFUSABLE_WITH_LATIN;
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UInitOnce gIdentifierInfoInitOnce = U_INITONCE_INITIALIZER;
278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_BEGIN
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UBool U_CALLCONV
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusIdentifierInfo_cleanup(void) {
328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete ASCII;
338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ASCII = NULL;
348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete JAPANESE;
358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    JAPANESE = NULL;
368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete CHINESE;
378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    CHINESE = NULL;
388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete KOREAN;
398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    KOREAN = NULL;
408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete CONFUSABLE_WITH_LATIN;
418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    CONFUSABLE_WITH_LATIN = NULL;
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    gIdentifierInfoInitOnce.reset();
438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return TRUE;
448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic void U_CALLCONV
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusIdentifierInfo_init(UErrorCode &status) {
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ASCII    = new UnicodeSet(0, 0x7f);
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    JAPANESE = new ScriptSet();
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CHINESE  = new ScriptSet();
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    KOREAN   = new ScriptSet();
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CONFUSABLE_WITH_LATIN = new ScriptSet();
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (ASCII == NULL || JAPANESE == NULL || CHINESE == NULL || KOREAN == NULL
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            || CONFUSABLE_WITH_LATIN == NULL) {
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ASCII->freeze();
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    JAPANESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HIRAGANA, status)
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius             .set(USCRIPT_KATAKANA, status);
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CHINESE->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_BOPOMOFO, status);
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    KOREAN->set(USCRIPT_LATIN, status).set(USCRIPT_HAN, status).set(USCRIPT_HANGUL, status);
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CONFUSABLE_WITH_LATIN->set(USCRIPT_CYRILLIC, status).set(USCRIPT_GREEK, status)
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              .set(USCRIPT_CHEROKEE, status);
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ucln_i18n_registerCleanup(UCLN_I18N_IDENTIFIER_INFO, IdentifierInfo_cleanup);
668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_CDECL_END
688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo::IdentifierInfo(UErrorCode &status):
718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         fIdentifier(NULL), fRequiredScripts(NULL), fScriptSetSet(NULL),
728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         fCommonAmongAlternates(NULL), fNumerics(NULL), fIdentifierProfile(NULL) {
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    umtx_initOnce(gIdentifierInfoInitOnce, &IdentifierInfo_init, status);
748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fIdentifier = new UnicodeString();
798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fRequiredScripts = new ScriptSet();
808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fScriptSetSet = uhash_open(uhash_hashScriptSet, uhash_compareScriptSet, NULL, &status);
818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_setKeyDeleter(fScriptSetSet, uhash_deleteScriptSet);
828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fCommonAmongAlternates = new ScriptSet();
838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fNumerics = new UnicodeSet();
848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fIdentifierProfile = new UnicodeSet(0, 0x10FFFF);
858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_SUCCESS(status) && (fIdentifier == NULL || fRequiredScripts == NULL || fScriptSetSet == NULL ||
878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                              fCommonAmongAlternates == NULL || fNumerics == NULL || fIdentifierProfile == NULL)) {
888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo::~IdentifierInfo() {
938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fIdentifier;
948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fRequiredScripts;
958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_close(fScriptSetSet);
968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fCommonAmongAlternates;
978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fNumerics;
988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete fIdentifierProfile;
998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::clear() {
1038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fRequiredScripts->resetAll();
1048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_removeAll(fScriptSetSet);
1058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fNumerics->clear();
1068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    fCommonAmongAlternates->resetAll();
1078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return *this;
1088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::setIdentifierProfile(const UnicodeSet &identifierProfile) {
1128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    *fIdentifierProfile = identifierProfile;
1138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return *this;
1148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeSet &IdentifierInfo::getIdentifierProfile() const {
1188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return *fIdentifierProfile;
1198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) {
1238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
1248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return *this;
1258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    *fIdentifier = identifier;
1278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    clear();
1288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ScriptSet scriptsForCP;
1298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UChar32 cp;
1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {
1318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        cp = identifier.char32At(i);
1328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // Store a representative character for each kind of decimal digit
1338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {
1348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            fNumerics->add(cp - (UChar32)u_getNumericValue(cp));
1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UScriptCode extensions[500];
138f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UPRV_LENGTHOF(extensions), &status);
1398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (U_FAILURE(status)) {
1408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return *this;
1418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        scriptsForCP.resetAll();
1438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        for (int32_t j=0; j<extensionsCount; j++) {
1448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            scriptsForCP.set(extensions[j], status);
1458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        scriptsForCP.reset(USCRIPT_COMMON, status);
1478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        scriptsForCP.reset(USCRIPT_INHERITED, status);
1488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        switch (scriptsForCP.countMembers()) {
1498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          case 0: break;
1508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          case 1:
1518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // Single script, record it.
1528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            fRequiredScripts->Union(scriptsForCP);
1538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
1548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          default:
1558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (!fRequiredScripts->intersects(scriptsForCP)
1568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    && !uhash_geti(fScriptSetSet, &scriptsForCP)) {
1578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // If the set hasn't been added already, add it
1588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                //    (Add a copy, fScriptSetSet takes ownership of the copy.)
1598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status);
1608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
1628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Now make a final pass through ScriptSetSet to remove alternates that came before singles.
1658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // [Kana], [Kana Hira] => [Kana]
1668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // This is relatively infrequent, so doesn't have to be optimized.
1678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // We also compute any commonalities among the alternates.
1688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (uhash_count(fScriptSetSet) > 0) {
1698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        fCommonAmongAlternates->setAll();
1701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        for (int32_t it = UHASH_FIRST;;) {
1718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it);
1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (nextHashEl == NULL) {
1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                break;
1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer);
1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // [Kana], [Kana Hira] => [Kana]
1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (fRequiredScripts->intersects(*next)) {
1788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                uhash_removeElement(fScriptSetSet, nextHashEl);
1798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            } else {
1808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                fCommonAmongAlternates->intersect(*next);
1818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
1821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                for (int32_t otherIt = UHASH_FIRST;;) {
1838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt);
1848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    if (otherHashEl == NULL) {
1858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        break;
1868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    }
1878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer);
1888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    if (next != other && next->contains(*other)) {
1898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        uhash_removeElement(fScriptSetSet, nextHashEl);
1908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        break;
1918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    }
1928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                }
1938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
1948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (uhash_count(fScriptSetSet) == 0) {
1978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        fCommonAmongAlternates->resetAll();
1988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return *this;
2008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeString *IdentifierInfo::getIdentifier() const {
2048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fIdentifier;
2058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst ScriptSet *IdentifierInfo::getScripts() const {
2088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fRequiredScripts;
2098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UHashtable *IdentifierInfo::getAlternates() const {
2128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fScriptSetSet;
2138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeSet *IdentifierInfo::getNumerics() const {
2178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fNumerics;
2188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst ScriptSet *IdentifierInfo::getCommonAmongAlternates() const {
2218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return fCommonAmongAlternates;
2228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#if !UCONFIG_NO_NORMALIZATION
2258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusURestrictionLevel IdentifierInfo::getRestrictionLevel(UErrorCode &status) const {
2278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (!fIdentifierProfile->containsAll(*fIdentifier) || getNumerics()->size() > 1) {
2288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_UNRESTRICTIVE;
2298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (ASCII->containsAll(*fIdentifier)) {
2318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_ASCII;
2328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // This is a bit tricky. We look at a number of factors.
2348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // The number of scripts in the text.
2358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Plus 1 if there is some commonality among the alternates (eg [Arab Thaa]; [Arab Syrc])
2368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Plus number of alternates otherwise (this only works because we only test cardinality up to 2.)
2378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Note: the requiredScripts set omits COMMON and INHERITED; they are taken out at the
2398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    //       time it is created, in setIdentifier().
2408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t cardinalityPlus = fRequiredScripts->countMembers() +
2418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
2428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (cardinalityPlus < 2) {
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return USPOOF_SINGLE_SCRIPT_RESTRICTIVE;
2448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (containsWithAlternates(*JAPANESE, *fRequiredScripts) || containsWithAlternates(*CHINESE, *fRequiredScripts)
2468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            || containsWithAlternates(*KOREAN, *fRequiredScripts)) {
2478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_HIGHLY_RESTRICTIVE;
2488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (cardinalityPlus == 2 &&
2508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            fRequiredScripts->test(USCRIPT_LATIN, status) &&
2518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            !fRequiredScripts->intersects(*CONFUSABLE_WITH_LATIN)) {
2528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return USPOOF_MODERATELY_RESTRICTIVE;
2538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return USPOOF_MINIMALLY_RESTRICTIVE;
2558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif /* !UCONFIG_NO_NORMALIZATION */
2588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t IdentifierInfo::getScriptCount() const {
2608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Note: Common and Inherited scripts were removed by setIdentifier(), and do not appear in fRequiredScripts.
2618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t count = fRequiredScripts->countMembers() +
2628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            (fCommonAmongAlternates->countMembers() == 0 ? uhash_count(fScriptSetSet) : 1);
2638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return count;
2648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUBool IdentifierInfo::containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const {
2698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (!container.contains(containee)) {
2708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return FALSE;
2718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    for (int32_t iter = UHASH_FIRST; ;) {
2738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const UHashElement *hashEl = uhash_nextElement(fScriptSetSet, &iter);
2748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (hashEl == NULL) {
2758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
2768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptSet *alternatives = static_cast<ScriptSet *>(hashEl->key.pointer);
2788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (!container.intersects(*alternatives)) {
2798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return false;
2808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return true;
2838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUnicodeString &IdentifierInfo::displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status) {
2868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UVector sorted(status);
2878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
2888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return dest;
2898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    for (int32_t pos = UHASH_FIRST; ;) {
2918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        const UHashElement *el = uhash_nextElement(alternates, &pos);
2928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (el == NULL) {
2938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            break;
2948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptSet *ss = static_cast<ScriptSet *>(el->key.pointer);
2968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        sorted.addElement(ss, status);
2978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    sorted.sort(uhash_compareScriptSet, status);
2998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UnicodeString separator = UNICODE_STRING_SIMPLE("; ");
3008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t i=0; i<sorted.size(); i++) {
3018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (i>0) {
3028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            dest.append(separator);
3038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
3048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ScriptSet *ss = static_cast<ScriptSet *>(sorted.elementAt(i));
3058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ss->displayScripts(dest);
3068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
3078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return dest;
3088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
3098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_NAMESPACE_END
3118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
312