16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*************************************************************************** 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2008-2013, International Business Machines Corporation 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*************************************************************************** 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: uspoof.cpp 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2008Feb13 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Andy Heninger 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Unicode Spoof Detection 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/normalizer2.h" 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uspoof.h" 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "identifier_info.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "mutex.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "scriptset.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucln_in.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uspoof_impl.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "umutex.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_USE 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Static Objects used by the spoof impl, their thread safe initialization and their cleanup. 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UnicodeSet *gInclusionSet = NULL; 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UnicodeSet *gRecommendedSet = NULL; 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const Normalizer2 *gNfdNormalizer = NULL; 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UMutex gInitMutex = U_MUTEX_INITIALIZER; 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_cleanup(void) { 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete gInclusionSet; 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gInclusionSet = NULL; 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete gRecommendedSet; 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gRecommendedSet = NULL; 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gNfdNormalizer = NULL; 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void initializeStatics() { 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Mutex m(&gInitMutex); 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (gInclusionSet == NULL) { 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gInclusionSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\ 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\-.\\u00B7\\u05F3\\u05F4\\u0F0B\\u200C\\u200D\\u2019]"), status); 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gRecommendedSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\ 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org [0-z\\u00C0-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\ 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B\\u021E\ 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u021F\\u0226-\\u0233\\u02BB\\u02BC\\u02EC\\u0300-\\u0304\ 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0306-\\u030C\\u030F-\\u0311\\u0313\\u0314\\u031B\\u0323-\ 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0328\\u032D\\u032E\\u0330\\u0331\\u0335\\u0338\\u0339\ 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0342-\\u0345\\u037B-\\u03CE\\u03FC-\\u045F\\u048A-\\u0525\ 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0531-\\u0586\\u05D0-\\u05F2\\u0621-\\u063F\\u0641-\\u0655\ 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0660-\\u0669\\u0670-\\u068D\\u068F-\\u06D5\\u06E5\\u06E6\ 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u06EE-\\u06FF\\u0750-\\u07B1\\u0901-\\u0939\\u093C-\\u094D\ 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0950\\u0960-\\u0972\\u0979-\\u0A4D\\u0A5C-\\u0A74\\u0A81-\ 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0B43\\u0B47-\\u0B61\\u0B66-\\u0C56\\u0C60\\u0C61\\u0C66-\ 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0CD6\\u0CE0-\\u0CEF\\u0D02-\\u0D28\\u0D2A-\\u0D39\\u0D3D-\ 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0D43\\u0D46-\\u0D4D\\u0D57-\\u0D61\\u0D66-\\u0D8E\\u0D91-\ 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0DA5\\u0DA7-\\u0DDE\\u0DF2\\u0E01-\\u0ED9\\u0F00\\u0F20-\ 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u0F8B\\u0F90-\\u109D\\u10D0-\\u10F0\\u10F7-\\u10FA\\u1200-\ 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u135A\\u135F\\u1380-\\u138F\\u1401-\\u167F\\u1780-\\u17A2\ 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CA\\u17D2\\u17D7-\ 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u17DC\\u17E0-\\u17E9\\u1810-\\u18A8\\u18AA-\\u18F5\\u1E00-\ 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u1E99\\u1F00-\\u1FFC\\u2D30-\\u2D65\\u2D80-\\u2DDE\\u3005-\ 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\u3007\\u3041-\\u31B7\\u3400-\\u9FCB\\uA000-\\uA48C\\uA67F\ 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\uA717-\\uA71F\\uA788\\uAA60-\\uAA7B\\uAC00-\\uD7A3\\uFA0E-\ 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\uFA29\\U00020000-\ 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org \\U0002B734]-[[:Cn:][:nfkcqc=n:][:XIDC=n:]]]"), status); 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gNfdNormalizer = Normalizer2::getNFDInstance(status); 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup); 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI USpoofChecker * U_EXPORT2 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_open(UErrorCode *status) { 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org initializeStatics(); 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *si = new SpoofImpl(SpoofData::getDefault(*status), *status); 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete si; 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org si = NULL; 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return reinterpret_cast<USpoofChecker *>(si); 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI USpoofChecker * U_EXPORT2 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org initializeStatics(); 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofData *sd = new SpoofData(data, length, *status); 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *si = new SpoofImpl(sd, *status); 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete sd; 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete si; 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sd == NULL || si == NULL) { 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_MEMORY_ALLOCATION_ERROR; 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete sd; 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete si; 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pActualLength != NULL) { 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pActualLength = sd->fRawData->fLength; 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return reinterpret_cast<USpoofChecker *>(si); 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI USpoofChecker * U_EXPORT2 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_clone(const USpoofChecker *sc, UErrorCode *status) { 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SpoofImpl *src = SpoofImpl::validateThis(sc, *status); 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (src == NULL) { 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete result; 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = NULL; 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return reinterpret_cast<USpoofChecker *>(result); 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_close(USpoofChecker *sc) { 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *This = SpoofImpl::validateThis(sc, status); 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete This; 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) { 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Verify that the requested checks are all ones (bits) that 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // are acceptable, known values. 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (checks & ~USPOOF_ALL_CHECKS) { 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->fChecks = checks; 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) { 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return This->fChecks; 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) { 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *This = SpoofImpl::validateThis(sc, status); 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This != NULL) { 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->fRestrictionLevel = restrictionLevel; 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI URestrictionLevel U_EXPORT2 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getRestrictionLevel(const USpoofChecker *sc) { 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SpoofImpl *This = SpoofImpl::validateThis(sc, status); 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return USPOOF_UNRESTRICTIVE; 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return This->fRestrictionLevel; 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) { 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->setAllowedLocales(localesList, *status); 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const char * U_EXPORT2 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) { 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return This->getAllowedLocales(*status); 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const USet * U_EXPORT2 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) { 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status); 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result->toUSet(); 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const UnicodeSet * U_EXPORT2 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) { 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return This->fAllowedCharsSet; 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) { 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *set = UnicodeSet::fromUSet(chars); 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uspoof_setAllowedUnicodeSet(sc, set, status); 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) { 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (chars->isBogus()) { 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone()); 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (clonedSet == NULL || clonedSet->isBogus()) { 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_MEMORY_ALLOCATION_ERROR; 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clonedSet->freeze(); 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete This->fAllowedCharsSet; 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->fAllowedCharsSet = clonedSet; 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->fChecks |= USPOOF_CHAR_LIMIT; 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_check(const USpoofChecker *sc, 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *id, int32_t length, 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *position, 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (length < -1) { 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString idStr((length == -1), id, length); // Aliasing constructor. 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t result = uspoof_checkUnicodeString(sc, idStr, position, status); 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_checkUTF8(const USpoofChecker *sc, 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *id, int32_t length, 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *position, 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id))); 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t result = uspoof_checkUnicodeString(sc, idStr, position, status); 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_areConfusable(const USpoofChecker *sc, 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *id1, int32_t length1, 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *id2, int32_t length2, 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl::validateThis(sc, *status); 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (length1 < -1 || length2 < -1) { 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_areConfusableUTF8(const USpoofChecker *sc, 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *id1, int32_t length1, 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *id2, int32_t length2, 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl::validateThis(sc, *status); 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (length1 < -1 || length2 < -1) { 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : uprv_strlen(id1))); 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : uprv_strlen(id2))); 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return results; 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_areConfusableUnicodeString(const USpoofChecker *sc, 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const icu::UnicodeString &id1, 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const icu::UnicodeString &id2, 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable, 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and for definitions of the types (single, whole, mixed-script) of confusables. 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We only care about a few of the check flags. Ignore the others. 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If no tests relavant to this function have been specified, return an error. 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: is this really the right thing to do? It's probably an error on the caller's part, 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // but logically we would just return 0 (no error). 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((This->fChecks & (USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org USPOOF_WHOLE_SCRIPT_CONFUSABLE)) == 0) { 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_INVALID_STATE_ERROR; 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t flagsForSkeleton = This->fChecks & USPOOF_ANY_CASE; 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t result = 0; 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org IdentifierInfo *identifierInfo = This->getIdentifierInfo(*status); 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org identifierInfo->setIdentifier(id1, *status); 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t id1ScriptCount = identifierInfo->getScriptCount(); 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org identifierInfo->setIdentifier(id2, *status); 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t id2ScriptCount = identifierInfo->getScriptCount(); 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->releaseIdentifierInfo(identifierInfo); 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org identifierInfo = NULL; 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) { 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString id1Skeleton; 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString id2Skeleton; 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (id1ScriptCount <= 1 && id2ScriptCount <= 1) { 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flagsForSkeleton |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id1, id1Skeleton, status); 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id2, id2Skeleton, status); 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (id1Skeleton == id2Skeleton) { 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (result & USPOOF_SINGLE_SCRIPT_CONFUSABLE) { 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the two inputs are single script confusable they cannot also be 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // mixed or whole script confusable, according to the UAX39 definitions. 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // So we can skip those tests. 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Two identifiers are whole script confusable if each is of a single script 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and they are mixed script confusable. 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool possiblyWholeScriptConfusables = 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org id1ScriptCount <= 1 && id2ScriptCount <= 1 && (This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE); 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Mixed Script Check 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) || possiblyWholeScriptConfusables ) { 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // For getSkeleton(), resetting the USPOOF_SINGLE_SCRIPT_CONFUSABLE flag will get us 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the mixed script table skeleton, which is what we want. 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The Any Case / Lower Case bit in the skelton flags was set at the top of the function. 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString id1Skeleton; 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString id2Skeleton; 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flagsForSkeleton &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE; 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id1, id1Skeleton, status); 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id2, id2Skeleton, status); 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (id1Skeleton == id2Skeleton) { 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (possiblyWholeScriptConfusables) { 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_checkUnicodeString(const USpoofChecker *sc, 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const icu::UnicodeString &id, 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *position, 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t result = 0; 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org IdentifierInfo *identifierInfo = NULL; 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) { 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org identifierInfo = This->getIdentifierInfo(*status); 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org identifierInfo->setIdentifier(id, *status); 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet); 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) { 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status); 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (idRestrictionLevel > This->fRestrictionLevel) { 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_RESTRICTION_LEVEL; 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This->fChecks & USPOOF_AUX_INFO) { 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= idRestrictionLevel; 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((This->fChecks) & USPOOF_MIXED_NUMBERS) { 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *numerics = identifierInfo->getNumerics(); 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (numerics->size() > 1) { 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_MIXED_NUMBERS; 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier. 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We have no easy way to do the same in C. 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // if (checkResult != null) { 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // checkResult.numerics = numerics; 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // } 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This->fChecks & (USPOOF_CHAR_LIMIT)) { 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length = id.length(); 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<length ;) { 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = id.char32At(i); 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i += U16_LENGTH(c); 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!This->fAllowedCharsSet->contains(c)) { 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_CHAR_LIMIT; 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This->fChecks & 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) { 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // These are the checks that need to be done on NFD input 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString nfdText; 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gNfdNormalizer->normalize(id, nfdText, *status); 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t nfdLength = nfdText.length(); 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This->fChecks & USPOOF_INVISIBLE) { 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // scan for more than one occurence of the same non-spacing mark 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in a sequence of non-spacing marks. 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 firstNonspacingMark = 0; 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool haveMultipleMarks = FALSE; 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence. 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<nfdLength ;) { 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = nfdText.char32At(i); 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i += U16_LENGTH(c); 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (u_charType(c) != U_NON_SPACING_MARK) { 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org firstNonspacingMark = 0; 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (haveMultipleMarks) { 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org marksSeenSoFar.clear(); 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org haveMultipleMarks = FALSE; 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (firstNonspacingMark == 0) { 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org firstNonspacingMark = c; 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!haveMultipleMarks) { 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org marksSeenSoFar.add(firstNonspacingMark); 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org haveMultipleMarks = TRUE; 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (marksSeenSoFar.contains(c)) { 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // report the error, and stop scanning. 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // No need to find more than the first failure. 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_INVISIBLE; 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org marksSeenSoFar.add(c); 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) { 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The basic test is the same for both whole and mixed script confusables. 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compute the set of scripts that every input character has a confusable in. 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // For this computation an input character is always considered to be 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // confusable with itself in its own script. 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the number of such scripts is two or more, and the input consisted of 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // characters all from a single script, we have a whole script confusable. 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (The two scripts will be the original script and the one that is confusable) 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the number of such scripts >= one, and the original input contained characters from 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // more than one script, we have a mixed script confusable. (We can transform 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // some of the characters, and end up with a visually similar string all in 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // one script.) 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (identifierInfo == NULL) { 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org identifierInfo = This->getIdentifierInfo(*status); 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto cleanupAndReturn; 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org identifierInfo->setIdentifier(id, *status); 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t scriptCount = identifierInfo->getScriptCount(); 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ScriptSet scripts; 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->wholeScriptCheck(nfdText, &scripts, *status); 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t confusableScriptCount = scripts.countMembers(); 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org //printf("confusableScriptCount = %d\n", confusableScriptCount); 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) && 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org confusableScriptCount >= 2 && 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org scriptCount == 1) { 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) && 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org confusableScriptCount >= 1 && 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org scriptCount > 1) { 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcleanupAndReturn: 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->releaseIdentifierInfo(identifierInfo); 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (position != NULL) { 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *position = 0; 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getSkeleton(const USpoofChecker *sc, 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t type, 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *id, int32_t length, 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl::validateThis(sc, *status); 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) { 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString idStr((length==-1), id, length); // Aliasing constructor 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString destStr; 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status); 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destStr.extract(dest, destCapacity, *status); 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return destStr.length(); 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_I18N_API UnicodeString & U_EXPORT2 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getSkeletonUnicodeString(const USpoofChecker *sc, 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t type, 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString &id, 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString &dest, 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t tableMask = 0; 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (type) { 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0: 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tableMask = USPOOF_ML_TABLE_FLAG; 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case USPOOF_SINGLE_SCRIPT_CONFUSABLE: 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tableMask = USPOOF_SL_TABLE_FLAG; 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case USPOOF_ANY_CASE: 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tableMask = USPOOF_MA_TABLE_FLAG; 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE: 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tableMask = USPOOF_SA_TABLE_FLAG; 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString nfdId; 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gNfdNormalizer->normalize(id, nfdId, *status); 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Apply the skeleton mapping to the NFD normalized input string 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Accumulate the skeleton, possibly unnormalized, in a UnicodeString. 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t inputIndex = 0; 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString skelStr; 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t normalizedLen = nfdId.length(); 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (inputIndex=0; inputIndex < normalizedLen; ) { 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = nfdId.char32At(inputIndex); 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputIndex += U16_LENGTH(c); 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org This->confusableLookup(c, tableMask, skelStr); 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gNfdNormalizer->normalize(skelStr, dest, *status); 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getSkeletonUTF8(const USpoofChecker *sc, 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t type, 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *id, int32_t length, 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *dest, int32_t destCapacity, 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *status) { 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl::validateThis(sc, *status); 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) { 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id))); 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString destStr; 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status); 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(*status)) { 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t lengthInUTF8 = 0; 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_strToUTF8(dest, destCapacity, &lengthInUTF8, 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destStr.getBuffer(), destStr.length(), status); 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return lengthInUTF8; 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) { 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (This == NULL) { 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(U_FAILURE(*status)); 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t dataSize = This->fSpoofData->fRawData->fLength; 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (capacity < dataSize) { 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_BUFFER_OVERFLOW_ERROR; 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dataSize; 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(buf, This->fSpoofData->fRawData, dataSize); 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dataSize; 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const USet * U_EXPORT2 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getInclusionSet(UErrorCode *) { 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org initializeStatics(); 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return gInclusionSet->toUSet(); 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const USet * U_EXPORT2 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getRecommendedSet(UErrorCode *) { 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org initializeStatics(); 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return gRecommendedSet->toUSet(); 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_I18N_API const UnicodeSet * U_EXPORT2 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getInclusionUnicodeSet(UErrorCode *) { 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org initializeStatics(); 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return gInclusionSet; 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_I18N_API const UnicodeSet * U_EXPORT2 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguspoof_getRecommendedUnicodeSet(UErrorCode *) { 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org initializeStatics(); 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return gRecommendedSet; 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif // !UCONFIG_NO_NORMALIZATION 737