1b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* 2b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*************************************************************************** 3b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* Copyright (C) 2008-2009, International Business Machines Corporation 4b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* and others. All Rights Reserved. 5b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*************************************************************************** 6b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* file name: uspoof.cpp 7b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* encoding: US-ASCII 8b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* tab size: 8 (not used) 9b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* indentation:4 10b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* 11b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* created on: 2008Feb13 12b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* created by: Andy Heninger 13b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* 14b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* Unicode Spoof Detection 15b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*/ 16b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/utypes.h" 17b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uspoof.h" 18b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/unorm.h" 19b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/ustring.h" 20b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "cmemory.h" 21b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "uspoof_impl.h" 22b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "uassert.h" 23b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 24b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 25b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 26b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 27b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 28b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include <stdio.h> // debug 29b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 30b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_NAMESPACE_USE 31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 33b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI USpoofChecker * U_EXPORT2 34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_open(UErrorCode *status) { 35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 36b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 37b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 38b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *si = new SpoofImpl(SpoofData::getDefault(*status), *status); 39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete si; 41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru si = NULL; 42b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return (USpoofChecker *)si; 44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI USpoofChecker * U_EXPORT2 48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, 49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 52b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 53b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofData *sd = new SpoofData(data, length, *status); 54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *si = new SpoofImpl(sd, *status); 55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 56b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete sd; 57b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete si; 58b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (sd == NULL || si == NULL) { 61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete sd; 63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete si; 64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 65b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (pActualLength != NULL) { 68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pActualLength = sd->fRawData->fLength; 69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return reinterpret_cast<USpoofChecker *>(si); 71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI USpoofChecker * U_EXPORT2 75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_clone(const USpoofChecker *sc, UErrorCode *status) { 76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const SpoofImpl *src = SpoofImpl::validateThis(sc, *status); 77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (src == NULL) { 78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor 81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete result; 83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result = NULL; 84b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return (USpoofChecker *)result; 86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI void U_EXPORT2 90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_close(USpoofChecker *sc) { 91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *This = SpoofImpl::validateThis(sc, status); 93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete This; 94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI void U_EXPORT2 98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) { 99b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Verify that the requested checks are all ones (bits) that 105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // are acceptable, known values. 106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (checks & ~USPOOF_ALL_CHECKS) { 107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 108b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru This->fChecks = checks; 112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) { 117b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 121b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return This->fChecks; 122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI void U_EXPORT2 125b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) { 126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 127b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 129b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 130b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru This->setAllowedLocales(localesList, *status); 131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 132b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 133b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI const char * U_EXPORT2 134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) { 135b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return This->getAllowedLocales(*status); 140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI const USet * U_EXPORT2 144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) { 145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status); 146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return reinterpret_cast<const USet *>(result); 147b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI const UnicodeSet * U_EXPORT2 150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) { 151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return This->fAllowedCharsSet; 156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI void U_EXPORT2 160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) { 161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeSet *set = reinterpret_cast<const UnicodeSet *>(chars); 162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uspoof_setAllowedUnicodeSet(sc, set, status); 163b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI void U_EXPORT2 167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) { 168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (chars->isBogus()) { 173b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 174b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone()); 177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (clonedSet == NULL || clonedSet->isBogus()) { 178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru clonedSet->freeze(); 182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete This->fAllowedCharsSet; 183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru This->fAllowedCharsSet = clonedSet; 184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru This->fChecks |= USPOOF_CHAR_LIMIT; 185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_check(const USpoofChecker *sc, 190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *text, int32_t length, 191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *position, 192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 198b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < -1) { 199b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length == -1) { 203b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // It's not worth the bother to handle nul terminated strings everywhere. 204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Just get the length and be done with it. 205b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length = u_strlen(text); 206b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t result = 0; 209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t failPos = 0x7fffffff; // TODO: do we have a #define for max int32? 210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // A count of the number of non-Common or inherited scripts. 212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Needed for both the SINGLE_SCRIPT and the WHOLE/MIXED_SCIRPT_CONFUSABLE tests. 213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Share the computation when possible. scriptCount == -1 means that we haven't 214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // done it yet. 215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t scriptCount = -1; 216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((This->fChecks) & USPOOF_SINGLE_SCRIPT) { 218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru scriptCount = This->scriptScan(text, length, failPos, *status); 219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // printf("scriptCount (clipped to 2) = %d\n", scriptCount); 220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( scriptCount >= 2) { 221b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Note: scriptCount == 2 covers all cases of the number of scripts >= 2 222b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result |= USPOOF_SINGLE_SCRIPT; 223b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This->fChecks & USPOOF_CHAR_LIMIT) { 227b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i; 228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c; 229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i=0; i<length ;) { 230b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(text, i, length, c); 231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!This->fAllowedCharsSet->contains(c)) { 232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result |= USPOOF_CHAR_LIMIT; 233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (i < failPos) { 234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru failPos = i; 235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 237b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 238b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 239b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 241b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This->fChecks & 242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) { 243b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // These are the checks that need to be done on NFKD input 244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru NFKDBuffer normalizedInput(text, length, *status); 245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *nfkdText = normalizedInput.getBuffer(); 246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t nfkdLength = normalizedInput.getLength(); 247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This->fChecks & USPOOF_INVISIBLE) { 249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // scan for more than one occurence of the same non-spacing mark 251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // in a sequence of non-spacing marks. 252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i; 253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c; 254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 firstNonspacingMark = 0; 255b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool haveMultipleMarks = FALSE; 256b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence. 257b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 258b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i=0; i<length ;) { 259b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(nfkdText, i, nfkdLength, c); 260b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (u_charType(c) != U_NON_SPACING_MARK) { 261b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru firstNonspacingMark = 0; 262b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (haveMultipleMarks) { 263b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru marksSeenSoFar.clear(); 264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru haveMultipleMarks = FALSE; 265b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 266b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 268b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (firstNonspacingMark == 0) { 269b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru firstNonspacingMark = c; 270b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; 271b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 272b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!haveMultipleMarks) { 273b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru marksSeenSoFar.add(firstNonspacingMark); 274b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru haveMultipleMarks = TRUE; 275b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 276b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (marksSeenSoFar.contains(c)) { 277b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // report the error, and stop scanning. 278b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // No need to find more than the first failure. 279b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result |= USPOOF_INVISIBLE; 280b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru failPos = i; 281b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 282b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 283b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru marksSeenSoFar.add(c); 284b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 285b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 286b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 287b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 288b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) { 289b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The basic test is the same for both whole and mixed script confusables. 290b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Compute the set of scripts that every input character has a confusable in. 291b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // For this computation an input character is always considered to be 292b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // confusable with itself in its own script. 293b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If the number of such scripts is two or more, and the input consisted of 294b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // characters all from a single script, we have a whole script confusable. 295b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // (The two scripts will be the original script and the one that is confusable) 296b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If the number of such scripts >= one, and the original input contained characters from 297b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // more than one script, we have a mixed script confusable. (We can transform 298b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // some of the characters, and end up with a visually similar string all in 299b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // one script.) 300b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 301b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (scriptCount == -1) { 302b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t t; 303b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru scriptCount = This->scriptScan(text, length, t, *status); 304b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 305b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 306b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ScriptSet scripts; 307b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru This->wholeScriptCheck(nfkdText, nfkdLength, &scripts, *status); 308b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t confusableScriptCount = scripts.countMembers(); 309b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru //printf("confusableScriptCount = %d\n", confusableScriptCount); 310b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 311b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) && 312b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru confusableScriptCount >= 2 && 313b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru scriptCount == 1) { 314b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 315b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 316b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 317b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) && 318b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru confusableScriptCount >= 1 && 319b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru scriptCount > 1) { 320b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 321b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 322b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 323b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 324b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (position != NULL && failPos != 0x7fffffff) { 325b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *position = failPos; 326b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 327b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return result; 328b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 329b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 330b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 331b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 332b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_checkUTF8(const USpoofChecker *sc, 333b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *text, int32_t length, 334b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *position, 335b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 336b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 337b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 338b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 339b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 340b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar stackBuf[USPOOF_STACK_BUFFER_SIZE]; 341b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar* text16 = stackBuf; 342b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t len16; 343b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 344b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_strFromUTF8(text16, USPOOF_STACK_BUFFER_SIZE, &len16, text, length, status); 345b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) { 346b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 347b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 348b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR) { 349b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru text16 = static_cast<UChar *>(uprv_malloc(len16 * sizeof(UChar) + 2)); 350b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (text16 == NULL) { 351b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 352b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 353b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 354b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ZERO_ERROR; 355b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_strFromUTF8(text16, len16+1, NULL, text, length, status); 356b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 357b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 358b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t position16 = -1; 359b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t result = uspoof_check(sc, text16, len16, &position16, status); 360b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 361b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 362b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 363b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 364b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (position16 > 0) { 365b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Translate a UTF-16 based error position back to a UTF-8 offset. 366b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // u_strToUTF8() in preflight mode is an easy way to do it. 367b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(position16 <= len16); 368b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_strToUTF8(NULL, 0, position, text16, position16, status); 369b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (position > 0) { 370b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // position is the required buffer length from u_strToUTF8, which includes 371b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // space for a terminating NULL, which we don't want, hence the -1. 372b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *position -= 1; 373b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 374b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ZERO_ERROR; // u_strToUTF8, above sets BUFFER_OVERFLOW_ERROR. 375b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 376b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 377b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (text16 != stackBuf) { 378b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(text16); 379b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 380b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return result; 381b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 382b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 383b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 384b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* A convenience wrapper around the public uspoof_getSkeleton that handles 385b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru * allocating a larger buffer than provided if the original is too small. 386b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru */ 387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic UChar *getSkeleton(const USpoofChecker *sc, uint32_t type, const UChar *s, int32_t inputLength, 388b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *dest, int32_t destCapacity, int32_t *outputLength, UErrorCode *status) { 389b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t requiredCapacity = 0; 390b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *buf = dest; 391b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 392b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 393b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 394b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 395b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru requiredCapacity = uspoof_getSkeleton(sc, type, s, inputLength, dest, destCapacity, status); 396b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR) { 397b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru buf = static_cast<UChar *>(uprv_malloc(requiredCapacity * sizeof(UChar))); 398b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (buf == NULL) { 399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 400b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 401b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 402b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ZERO_ERROR; 403b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uspoof_getSkeleton(sc, type, s, inputLength, buf, requiredCapacity, status); 404b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 405b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *outputLength = requiredCapacity; 406b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return buf; 407b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 408b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 409b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 410b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 411b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_areConfusable(const USpoofChecker *sc, 412b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *s1, int32_t length1, 413b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *s2, int32_t length2, 414b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 415b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 416b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 417b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 418b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 419b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 420b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable, 421b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // and for definitions of the types (single, whole, mixed-script) of confusables. 422b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 423b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // We only care about a few of the check flags. Ignore the others. 424b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If no tests relavant to this function have been specified, return an error. 425b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // TODO: is this really the right thing to do? It's probably an error on the caller's part, 426b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // but logically we would just return 0 (no error). 427b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((This->fChecks & (USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | 428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru USPOOF_WHOLE_SCRIPT_CONFUSABLE)) == 0) { 429b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_INVALID_STATE_ERROR; 430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t flagsForSkeleton = This->fChecks & USPOOF_ANY_CASE; 433b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar s1SkeletonBuf[USPOOF_STACK_BUFFER_SIZE]; 434b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *s1Skeleton; 435b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t s1SkeletonLength = 0; 436b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 437b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar s2SkeletonBuf[USPOOF_STACK_BUFFER_SIZE]; 438b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *s2Skeleton; 439b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t s2SkeletonLength = 0; 440b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 441b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t result = 0; 442b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t t; 443b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t s1ScriptCount = This->scriptScan(s1, length1, t, *status); 444b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t s2ScriptCount = This->scriptScan(s2, length2, t, *status); 445b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 446b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) { 447b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Do the Single Script compare. 448b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s1ScriptCount <= 1 && s2ScriptCount <= 1) { 449b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru flagsForSkeleton |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 450b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s1Skeleton = getSkeleton(sc, flagsForSkeleton, s1, length1, s1SkeletonBuf, 451b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sizeof(s1SkeletonBuf)/sizeof(UChar), &s1SkeletonLength, status); 452b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s2Skeleton = getSkeleton(sc, flagsForSkeleton, s2, length2, s2SkeletonBuf, 453b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sizeof(s2SkeletonBuf)/sizeof(UChar), &s2SkeletonLength, status); 454b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s1SkeletonLength == s2SkeletonLength && u_strncmp(s1Skeleton, s2Skeleton, s1SkeletonLength) == 0) { 455b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s1Skeleton != s1SkeletonBuf) { 458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(s1Skeleton); 459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s2Skeleton != s2SkeletonBuf) { 461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(s2Skeleton); 462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (result & USPOOF_SINGLE_SCRIPT_CONFUSABLE) { 467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If the two inputs are single script confusable they cannot also be 468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // mixed or whole script confusable, according to the UAX39 definitions. 469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // So we can skip those tests. 470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return result; 471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Optimization for whole script confusables test: two identifiers are whole script confusable if 474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // each is of a single script and they are mixed script confusable. 475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UBool possiblyWholeScriptConfusables = 476b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s1ScriptCount <= 1 && s2ScriptCount <= 1 && (This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE); 477b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 478b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 479b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Mixed Script Check 480b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 481b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) || possiblyWholeScriptConfusables ) { 482b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // For getSkeleton(), resetting the USPOOF_SINGLE_SCRIPT_CONFUSABLE flag will get us 483b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // the mixed script table skeleton, which is what we want. 484b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The Any Case / Lower Case bit in the skelton flags was set at the top of the function. 485b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru flagsForSkeleton &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE; 486b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s1Skeleton = getSkeleton(sc, flagsForSkeleton, s1, length1, s1SkeletonBuf, 487b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sizeof(s1SkeletonBuf)/sizeof(UChar), &s1SkeletonLength, status); 488b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s2Skeleton = getSkeleton(sc, flagsForSkeleton, s2, length2, s2SkeletonBuf, 489b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sizeof(s2SkeletonBuf)/sizeof(UChar), &s2SkeletonLength, status); 490b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s1SkeletonLength == s2SkeletonLength && u_strncmp(s1Skeleton, s2Skeleton, s1SkeletonLength) == 0) { 491b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 492b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (possiblyWholeScriptConfusables) { 493b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 494b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 495b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 496b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s1Skeleton != s1SkeletonBuf) { 497b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(s1Skeleton); 498b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 499b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s2Skeleton != s2SkeletonBuf) { 500b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(s2Skeleton); 501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return result; 505b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 508b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Convenience function for converting a UTF-8 input to a UChar * string, including 509b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// reallocating a buffer when required. Parameters and their interpretation mostly 510b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// match u_strFromUTF8. 511b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querustatic UChar * convertFromUTF8(UChar *outBuf, int32_t outBufCapacity, int32_t *outputLength, 513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *in, int32_t inLength, UErrorCode *status) { 514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *dest = outBuf; 518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_strFromUTF8(dest, outBufCapacity, outputLength, in, inLength, status); 519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR) { 520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru dest = static_cast<UChar *>(uprv_malloc(*outputLength * sizeof(UChar))); 521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (dest == NULL) { 522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ZERO_ERROR; 526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_strFromUTF8(dest, *outputLength, NULL, in, inLength, status); 527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 528b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return dest; 529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 530b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 533b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 534b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_areConfusableUTF8(const USpoofChecker *sc, 535b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *s1, int32_t length1, 536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *s2, int32_t length2, 537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl::validateThis(sc, *status); 540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar s1Buf[USPOOF_STACK_BUFFER_SIZE]; 545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t lengthS1U; 546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *s1U = convertFromUTF8(s1Buf, USPOOF_STACK_BUFFER_SIZE, &lengthS1U, s1, length1, status); 547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar s2Buf[USPOOF_STACK_BUFFER_SIZE]; 549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t lengthS2U; 550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *s2U = convertFromUTF8(s2Buf, USPOOF_STACK_BUFFER_SIZE, &lengthS2U, s2, length2, status); 551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 552b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t results = uspoof_areConfusable(sc, s1U, lengthS1U, s2U, lengthS2U, status); 553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 554b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s1U != s1Buf) { 555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(s1U); 556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (s2U != s2Buf) { 558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(s2U); 559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return results; 561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_areConfusableUnicodeString(const USpoofChecker *sc, 566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const U_NAMESPACE_QUALIFIER UnicodeString &s1, 567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const U_NAMESPACE_QUALIFIER UnicodeString &s2, 568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 569b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 570b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *u1 = s1.getBuffer(); 571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t length1 = s1.length(); 572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *u2 = s2.getBuffer(); 573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t length2 = s2.length(); 574b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t results = uspoof_areConfusable(sc, u1, length1, u2, length2, status); 576b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return results; 577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_checkUnicodeString(const USpoofChecker *sc, 584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const U_NAMESPACE_QUALIFIER UnicodeString &text, 585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *position, 586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t result = uspoof_check(sc, text.getBuffer(), text.length(), position, status); 588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return result; 589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 591b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 592b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 593b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getSkeleton(const USpoofChecker *sc, 594b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t type, 595b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *s, int32_t length, 596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *dest, int32_t destCapacity, 597b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 598b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 599b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // TODO: this function could be sped up a bit 600b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Skip the input normalization when not needed, work from callers data. 601b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Put the initial skeleton straight into the caller's destination buffer. 602b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // It probably won't need normalization. 603b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // But these would make the structure more complicated. 604b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 605b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 606b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 607b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 608b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 609b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL) || 610b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (type & ~(USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE)) != 0) { 611b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 613b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 615b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t tableMask = 0; 616b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru switch (type) { 617b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case 0: 618b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tableMask = USPOOF_ML_TABLE_FLAG; 619b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 620b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case USPOOF_SINGLE_SCRIPT_CONFUSABLE: 621b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tableMask = USPOOF_SL_TABLE_FLAG; 622b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 623b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case USPOOF_ANY_CASE: 624b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tableMask = USPOOF_MA_TABLE_FLAG; 625b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 626b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE: 627b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tableMask = USPOOF_SA_TABLE_FLAG; 628b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 629b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru default: 630b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 631b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 632b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 633b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 634b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // NFKD transform of the user supplied input 635b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 636b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar nfkdStackBuf[USPOOF_STACK_BUFFER_SIZE]; 637b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *nfkdInput = nfkdStackBuf; 638b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t normalizedLen = unorm_normalize( 639b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s, length, UNORM_NFKD, 0, nfkdInput, USPOOF_STACK_BUFFER_SIZE, status); 640b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR) { 641b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nfkdInput = (UChar *)uprv_malloc((normalizedLen+1)*sizeof(UChar)); 642b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (nfkdInput == NULL) { 643b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 644b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 645b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 646b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ZERO_ERROR; 647b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru normalizedLen = unorm_normalize(s, length, UNORM_NFKD, 0, 648b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru nfkdInput, normalizedLen+1, status); 649b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 650b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 651b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (nfkdInput != nfkdStackBuf) { 652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(nfkdInput); 653b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 654b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 655b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 656b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 657b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // buffer to hold the Unicode defined skeleton mappings for a single code point 658b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar buf[USPOOF_MAX_SKELETON_EXPANSION]; 659b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 660b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Apply the skeleton mapping to the NFKD normalized input string 661b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Accumulate the skeleton, possibly unnormalized, in a UnicodeString. 662b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t inputIndex = 0; 663b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString skelStr; 664b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while (inputIndex < normalizedLen) { 665b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c; 666b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(nfkdInput, inputIndex, normalizedLen, c); 667b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t replaceLen = This->confusableLookup(c, tableMask, buf); 668b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru skelStr.append(buf, replaceLen); 669b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 670b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 671b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (nfkdInput != nfkdStackBuf) { 672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(nfkdInput); 673b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 674b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 675b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *result = skelStr.getBuffer(); 676b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t resultLen = skelStr.length(); 677b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *normedResult = NULL; 678b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 679b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Check the skeleton for NFKD, normalize it if needed. 680b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Unnormalized results should be very rare. 681b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!unorm_isNormalized(result, resultLen, UNORM_NFKD, status)) { 68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalizedLen = unorm_normalize(result, resultLen, UNORM_NFKD, 0, NULL, 0, status); 68350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normedResult = static_cast<UChar *>(uprv_malloc((normalizedLen+1)*sizeof(UChar))); 684b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (normedResult == NULL) { 685b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 686b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 687b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 68850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_ZERO_ERROR; 689b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unorm_normalize(result, resultLen, UNORM_NFKD, 0, normedResult, normalizedLen+1, status); 690b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result = normedResult; 691b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru resultLen = normalizedLen; 692b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 693b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 694b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Copy the skeleton to the caller's buffer 695b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_SUCCESS(*status)) { 696b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (destCapacity == 0 || resultLen > destCapacity) { 697b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = resultLen>destCapacity ? U_BUFFER_OVERFLOW_ERROR : U_STRING_NOT_TERMINATED_WARNING; 698b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 699b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_memcpy(dest, result, resultLen); 700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (destCapacity > resultLen) { 701b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru dest[resultLen] = 0; 702b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 703b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_STRING_NOT_TERMINATED_WARNING; 704b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 705b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 707b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(normedResult); 708b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return resultLen; 709b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 710b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 711b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 712b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 713b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UnicodeString & U_EXPORT2 714b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getSkeletonUnicodeString(const USpoofChecker *sc, 715b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t type, 716b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeString &s, 717b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString &dest, 718b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 719b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 720b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return dest; 721b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 722b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru dest.remove(); 723b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 724b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *str = s.getBuffer(); 725b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t strLen = s.length(); 726b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar smallBuf[USPOOF_STACK_BUFFER_SIZE]; 727b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *buf = smallBuf; 728b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t outputSize = uspoof_getSkeleton(sc, type, str, strLen, smallBuf, USPOOF_STACK_BUFFER_SIZE, status); 729b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR) { 730b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru buf = static_cast<UChar *>(uprv_malloc((outputSize+1)*sizeof(UChar))); 731b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (buf == NULL) { 732b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 733b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return dest; 734b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 735b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ZERO_ERROR; 736b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uspoof_getSkeleton(sc, type, str, strLen, buf, outputSize+1, status); 737b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 738b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_SUCCESS(*status)) { 739b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru dest.setTo(buf, outputSize); 740b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 741b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 742b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (buf != smallBuf) { 743b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(buf); 744b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 745b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return dest; 746b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 747b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 748b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 749b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 750b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_getSkeletonUTF8(const USpoofChecker *sc, 751b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t type, 752b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *s, int32_t length, 753b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru char *dest, int32_t destCapacity, 754b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 755b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Lacking a UTF-8 normalization API, just converting the input to 756b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // UTF-16 seems as good an approach as any. In typical use, input will 757b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // be an identifier, which is to say not too long for stack buffers. 758b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 760b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 761b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Buffers for the UChar form of the input and skeleton strings. 762b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar smallInBuf[USPOOF_STACK_BUFFER_SIZE]; 763b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *inBuf = smallInBuf; 764b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar smallOutBuf[USPOOF_STACK_BUFFER_SIZE]; 765b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *outBuf = smallOutBuf; 766b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 767b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t lengthInUChars = 0; 768b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t skelLengthInUChars = 0; 769b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t skelLengthInUTF8 = 0; 770b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 771b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_strFromUTF8(inBuf, USPOOF_STACK_BUFFER_SIZE, &lengthInUChars, 772b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s, length, status); 773b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR) { 774b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru inBuf = static_cast<UChar *>(uprv_malloc((lengthInUChars+1)*sizeof(UChar))); 775b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (inBuf == NULL) { 776b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 777b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto cleanup; 778b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 779b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_ZERO_ERROR; 78050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_strFromUTF8(inBuf, lengthInUChars+1, &lengthInUChars, 781b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru s, length, status); 782b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 783b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 78450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars, 785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru outBuf, USPOOF_STACK_BUFFER_SIZE, status); 786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (*status == U_BUFFER_OVERFLOW_ERROR) { 787b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru outBuf = static_cast<UChar *>(uprv_malloc((skelLengthInUChars+1)*sizeof(UChar))); 788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (outBuf == NULL) { 789b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto cleanup; 791b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 79250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *status = U_ZERO_ERROR; 79350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho skelLengthInUChars = uspoof_getSkeleton(sc, type, inBuf, lengthInUChars, 79450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho outBuf, skelLengthInUChars+1, status); 795b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 796b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 797b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_strToUTF8(dest, destCapacity, &skelLengthInUTF8, 798b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru outBuf, skelLengthInUChars, status); 799b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 800b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru cleanup: 801b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (inBuf != smallInBuf) { 802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(inBuf); 803b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 804b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (outBuf != smallOutBuf) { 805b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(outBuf); 806b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 807b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return skelLengthInUTF8; 808b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 809b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 810b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 812b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) { 813b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 814b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 815b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(U_FAILURE(*status)); 816b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 817b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 818b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t dataSize = This->fSpoofData->fRawData->fLength; 819b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (capacity < dataSize) { 820b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 821b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return dataSize; 822b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 823b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memcpy(buf, This->fSpoofData->fRawData, dataSize); 824b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return dataSize; 825b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 826b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 827b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 828