1b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/* 2b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru********************************************************************** 3f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* Copyright (C) 2008-2014, International Business Machines 4b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru********************************************************************** 6b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru*/ 7b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 8b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/utypes.h" 9b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uspoof.h" 10b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uchar.h" 11b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "unicode/uniset.h" 12103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 13b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "utrie2.h" 14b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "cmemory.h" 15b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "cstring.h" 168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "identifier_info.h" 178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "scriptset.h" 18b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "umutex.h" 19b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "udataswp.h" 20b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "uassert.h" 21b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "uspoof_impl.h" 22b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 23b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 24b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 25b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 26b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_NAMESPACE_BEGIN 27b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 28b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl) 29b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 30b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofImpl::SpoofImpl(SpoofData *data, UErrorCode &status) : 318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) , 328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) { 33b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 36b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fSpoofData = data; 378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE; 388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff); 408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius allowedCharsSet->freeze(); 418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fAllowedCharsSet = allowedCharsSet; 428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fAllowedLocales = uprv_strdup(""); 438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (fAllowedCharsSet == NULL || fAllowedLocales == NULL) { 44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fMagic = USPOOF_MAGIC; 48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusSpoofImpl::SpoofImpl() : 528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fMagic(USPOOF_MAGIC), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) , 538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) { 54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff); 55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru allowedCharsSet->freeze(); 56b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAllowedCharsSet = allowedCharsSet; 57b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAllowedLocales = uprv_strdup(""); 588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE; 59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Copy Constructor, used by the user level clone() function. 63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofImpl::SpoofImpl(const SpoofImpl &src, UErrorCode &status) : 648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) , 658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) { 66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fMagic = src.fMagic; 70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fChecks = src.fChecks; 71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (src.fSpoofData != NULL) { 72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fSpoofData = src.fSpoofData->addReference(); 73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAllowedCharsSet = static_cast<const UnicodeSet *>(src.fAllowedCharsSet->clone()); 75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fAllowedCharsSet == NULL) { 76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAllowedLocales = uprv_strdup(src.fAllowedLocales); 798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius fRestrictionLevel = src.fRestrictionLevel; 80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofImpl::~SpoofImpl() { 83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fMagic = 0; // head off application errors by preventing use of 84b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // of deleted objects. 85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fSpoofData != NULL) { 86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fSpoofData->removeReference(); // Will delete if refCount goes to zero. 87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete fAllowedCharsSet; 89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free((void *)fAllowedLocales); 908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete fCachedIdentifierInfo; 91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Incoming parameter check on Status and the SpoofChecker object 95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// received from the C API. 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruconst SpoofImpl *SpoofImpl::validateThis(const USpoofChecker *sc, UErrorCode &status) { 98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 99b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (sc == NULL) { 102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_ILLEGAL_ARGUMENT_ERROR; 103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 1048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofImpl *This = (SpoofImpl *)sc; 106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This->fMagic != USPOOF_MAGIC || 107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru This->fSpoofData == NULL) { 108b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_INVALID_FORMAT_ERROR; 109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!SpoofData::validateDataVersion(This->fSpoofData->fRawData, status)) { 112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return This; 115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 117b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofImpl *SpoofImpl::validateThis(USpoofChecker *sc, UErrorCode &status) { 118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return const_cast<SpoofImpl *> 119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (SpoofImpl::validateThis(const_cast<const USpoofChecker *>(sc), status)); 120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 121b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//-------------------------------------------------------------------------------------- 125b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// confusableLookup() This is the heart of the confusable skeleton generation 127b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// implementation. 128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 129b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Given a source character, produce the corresponding 1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// replacement character(s), appending them to the dest string. 131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 132b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//--------------------------------------------------------------------------------------- 1338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t SpoofImpl::confusableLookup(UChar32 inChar, int32_t tableMask, UnicodeString &dest) const { 134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 135b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Binary search the spoof data key table for the inChar 136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *low = fSpoofData->fCFUKeys; 137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *mid = NULL; 138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *limit = low + fSpoofData->fRawData->fCFUKeysSize; 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 midc; 140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru do { 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t delta = ((int32_t)(limit-low))/2; 142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru mid = low + delta; 143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru midc = *mid & 0x1fffff; 144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (inChar == midc) { 145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto foundChar; 146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if (inChar < midc) { 147b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru limit = mid; 148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru low = mid; 150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } while (low < limit-1); 152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru mid = low; 153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru midc = *mid & 0x1fffff; 154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (inChar != midc) { 155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Char not found. It maps to itself. 156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int i = 0; 1578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius dest.append(inChar); 158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return i; 159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru foundChar: 161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t keyFlags = *mid & 0xff000000; 162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((keyFlags & tableMask) == 0) { 163b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // We found the right key char, but the entry doesn't pertain to the 164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // table we need. See if there is an adjacent key that does 165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) { 166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *altMid; 167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) { 168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru keyFlags = *altMid & 0xff000000; 169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (keyFlags & tableMask) { 170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru mid = altMid; 171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto foundKey; 172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 173b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 174b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) { 175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru keyFlags = *altMid & 0xff000000; 176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (keyFlags & tableMask) { 177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru mid = altMid; 178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru goto foundKey; 179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // No key entry for this char & table. 183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The input char maps to itself. 184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int i = 0; 1858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius dest.append(inChar); 186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return i; 187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru foundKey: 190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1; 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t keyTableIndex = (int32_t)(mid - fSpoofData->fCFUKeys); 192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Value is either a UChar (for strings of length 1) or 194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // an index into the string table (for longer strings) 195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint16_t value = fSpoofData->fCFUValues[keyTableIndex]; 196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (stringLen == 1) { 1978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius dest.append((UChar)value); 198b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 1; 199b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // String length of 4 from the above lookup is used for all strings of length >= 4. 202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // For these, get the real length from the string lengths table, 203b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // which maps string table indexes to lengths. 204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // All strings of the same length are stored contiguously in the string table. 205b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 'value' from the lookup above is the starting index for the desired string. 206b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t ix; 208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (stringLen == 4) { 209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize; 210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (ix = 0; ix < stringLengthsLimit; ix++) { 211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) { 212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength; 213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(ix < stringLengthsLimit); 217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho U_ASSERT(value + stringLen <= fSpoofData->fRawData->fCFUStringTableLen); 220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *src = &fSpoofData->fCFUStrings[value]; 2218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius dest.append(src, stringLen); 222b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return stringLen; 223b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//--------------------------------------------------------------------------------------- 227b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// wholeScriptCheck() 229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Input text is already normalized to NFD 231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Return the set of scripts, each of which can represent something that is 232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// confusable with the input text. The script of the input text 233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// is included; input consisting of characters from a single script will 234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// always produce a result consisting of a set containing that script. 235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//--------------------------------------------------------------------------------------- 237b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SpoofImpl::wholeScriptCheck( 2388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius const UnicodeString &text, ScriptSet *result, UErrorCode &status) const { 239b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UTrie2 *table = 241b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (fChecks & USPOOF_ANY_CASE) ? fSpoofData->fAnyCaseTrie : fSpoofData->fLowerCaseTrie; 242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->setAll(); 2438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius int32_t length = text.length(); 2448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for (int32_t inputIdx=0; inputIdx < length;) { 2458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UChar32 c = text.char32At(inputIdx); 2468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius inputIdx += U16_LENGTH(c); 247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t index = utrie2_get32(table, c); 248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (index == 0) { 249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // No confusables in another script for this char. 250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // TODO: we should change the data to have sets with just the single script 251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // bit for the script of this char. Gets rid of this special case. 252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Until then, grab the script from the char and intersect it with the set. 253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UScriptCode cpScript = uscript_getScript(c, &status); 254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(cpScript > USCRIPT_INHERITED); 2558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius result->intersect(cpScript, status); 256b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if (index == 1) { 257b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Script == Common or Inherited. Nothing to do. 258b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 259b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru result->intersect(fSpoofData->fScriptSets[index]); 260b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 261b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 262b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 263b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 265b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SpoofImpl::setAllowedLocales(const char *localesList, UErrorCode &status) { 266b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet allowedChars; 267b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet *tmpSet = NULL; 268b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *locStart = localesList; 269b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *locEnd = NULL; 270b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *localesListEnd = localesList + uprv_strlen(localesList); 271b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t localeListCount = 0; // Number of locales provided by caller. 272b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 273b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Loop runs once per locale from the localesList, a comma separated list of locales. 274b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru do { 275b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru locEnd = uprv_strchr(locStart, ','); 276b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (locEnd == NULL) { 277b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru locEnd = localesListEnd; 278b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 279b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while (*locStart == ' ') { 280b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru locStart++; 281b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 282b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *trimmedEnd = locEnd-1; 283b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while (trimmedEnd > locStart && *trimmedEnd == ' ') { 284b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru trimmedEnd--; 285b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 286b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (trimmedEnd <= locStart) { 287b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 288b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *locale = uprv_strndup(locStart, (int32_t)(trimmedEnd + 1 - locStart)); 290b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru localeListCount++; 291b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 292b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // We have one locale from the locales list. 293b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Add the script chars for this locale to the accumulating set of allowed chars. 294b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If the locale is no good, we will be notified back via status. 295b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru addScriptChars(locale, &allowedChars, status); 296b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free((void *)locale); 297b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 298b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 299b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 300b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru locStart = locEnd + 1; 301b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } while (locStart < localesListEnd); 302b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 303b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If our caller provided an empty list of locales, we disable the allowed characters checking 304b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (localeListCount == 0) { 305b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free((void *)fAllowedLocales); 306b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAllowedLocales = uprv_strdup(""); 307b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tmpSet = new UnicodeSet(0, 0x10ffff); 308b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fAllowedLocales == NULL || tmpSet == NULL) { 309b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 310b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 311b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 312b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tmpSet->freeze(); 313b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete fAllowedCharsSet; 314b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAllowedCharsSet = tmpSet; 315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fChecks &= ~USPOOF_CHAR_LIMIT; 316b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 317b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 318b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 319b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 320b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Add all common and inherited characters to the set of allowed chars. 321b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet tempSet; 322b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status); 323b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru allowedChars.addAll(tempSet); 324b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status); 325b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru allowedChars.addAll(tempSet); 326b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 327b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If anything went wrong, we bail out without changing 328b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // the state of the spoof checker. 329b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 330b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 331b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 332b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 333b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Store the updated spoof checker state. 334b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tmpSet = static_cast<UnicodeSet *>(allowedChars.clone()); 335b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const char *tmpLocalesList = uprv_strdup(localesList); 336b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (tmpSet == NULL || tmpLocalesList == NULL) { 337b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 338b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 339b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 340b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free((void *)fAllowedLocales); 341b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAllowedLocales = tmpLocalesList; 342b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tmpSet->freeze(); 343b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete fAllowedCharsSet; 344b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAllowedCharsSet = tmpSet; 345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fChecks |= USPOOF_CHAR_LIMIT; 346b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 347b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 348b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 349b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruconst char * SpoofImpl::getAllowedLocales(UErrorCode &/*status*/) { 350b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return fAllowedLocales; 351b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 352b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 353b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 354b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Given a locale (a language), add all the characters from all of the scripts used with that language 355b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// to the allowedChars UnicodeSet 356b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 357b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SpoofImpl::addScriptChars(const char *locale, UnicodeSet *allowedChars, UErrorCode &status) { 358b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UScriptCode scripts[30]; 359b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 360b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status); 361b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 362b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 363b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 364b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (status == U_USING_DEFAULT_WARNING) { 365b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_ILLEGAL_ARGUMENT_ERROR; 366b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 367b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 368b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet tmpSet; 369b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t i; 370b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i=0; i<numScripts; i++) { 371b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status); 372b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru allowedChars->addAll(tmpSet); 373b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 374b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 375b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 376b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 377b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Convert a text format hex number. Utility function used by builder code. Static. 378b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Input: UChar *string text. Output: a UChar32 379b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Input has been pre-checked, and will have no non-hex chars. 380b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// The number must fall in the code point range of 0..0x10ffff 381b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Static Function. 382b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruUChar32 SpoofImpl::ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status) { 383b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 384b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 385b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 386b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(limit-start > 0); 387b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t val = 0; 388b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int i; 389b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (i=start; i<limit; i++) { 390b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int digitVal = s[i] - 0x30; 391b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (digitVal>9) { 392b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru digitVal = 0xa + (s[i] - 0x41); // Upper Case 'A' 393b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 394b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (digitVal>15) { 395b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru digitVal = 0xa + (s[i] - 0x61); // Lower Case 'a' 396b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 397b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(digitVal <= 0xf); 398b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru val <<= 4; 399b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru val += digitVal; 400b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 401b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (val > 0x10ffff) { 402b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_PARSE_ERROR; 403b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru val = 0; 404b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 405b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return (UChar32)val; 406b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 407b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 4088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create. 4098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// Maintain a one-element cache, which is sufficient to avoid repeatedly 4108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// creating new ones unless we get multi-thread concurrency in spoof 4118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// check operations, which should be statistically uncommon. 4128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// These functions are used in place of new & delete of an IdentifierInfo. 4148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// They will recycle the IdentifierInfo when possible. 4158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// They are logically const, and used within const functions that must be thread safe. 4168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusIdentifierInfo *SpoofImpl::getIdentifierInfo(UErrorCode &status) const { 4178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius IdentifierInfo *returnIdInfo = NULL; 4188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status)) { 4198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return returnIdInfo; 4208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius SpoofImpl *nonConstThis = const_cast<SpoofImpl *>(this); 4228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius { 4238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius Mutex m; 4248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius returnIdInfo = nonConstThis->fCachedIdentifierInfo; 4258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius nonConstThis->fCachedIdentifierInfo = NULL; 4268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (returnIdInfo == NULL) { 4288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius returnIdInfo = new IdentifierInfo(status); 4298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_SUCCESS(status) && returnIdInfo == NULL) { 4308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 4318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (U_FAILURE(status) && returnIdInfo != NULL) { 4338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete returnIdInfo; 4348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius returnIdInfo = NULL; 4358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return returnIdInfo; 4388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 4398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid SpoofImpl::releaseIdentifierInfo(IdentifierInfo *idInfo) const { 4428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (idInfo != NULL) { 4438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius SpoofImpl *nonConstThis = const_cast<SpoofImpl *>(this); 4448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius { 4458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius Mutex m; 4468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (nonConstThis->fCachedIdentifierInfo == NULL) { 4478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius nonConstThis->fCachedIdentifierInfo = idInfo; 4488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius idInfo = NULL; 4498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete idInfo; 4528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 4548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 456b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 457b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 458b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//---------------------------------------------------------------------------------------------- 459b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 460b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// class SpoofData Implementation 461b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 462b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//---------------------------------------------------------------------------------------------- 463b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 464b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 465b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruUBool SpoofData::validateDataVersion(const SpoofDataHeader *rawData, UErrorCode &status) { 466b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status) || 467b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru rawData == NULL || 468b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru rawData->fMagic != USPOOF_MAGIC || 469b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru rawData->fFormatVersion[0] > 1 || 470b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru rawData->fFormatVersion[1] > 0) { 471b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_INVALID_FORMAT_ERROR; 472b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return FALSE; 473b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 474b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return TRUE; 475b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 476b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 477f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UBool U_CALLCONV 478f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusspoofDataIsAcceptable(void *context, 479f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const char * /* type */, const char * /*name*/, 480f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UDataInfo *pInfo) { 481f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( 482f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pInfo->size >= 20 && 483f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pInfo->isBigEndian == U_IS_BIG_ENDIAN && 484f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pInfo->charsetFamily == U_CHARSET_FAMILY && 485f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pInfo->dataFormat[0] == 0x43 && // dataFormat="Cfu " 486f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pInfo->dataFormat[1] == 0x66 && 487f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pInfo->dataFormat[2] == 0x75 && 488f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pInfo->dataFormat[3] == 0x20 && 489f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pInfo->formatVersion[0] == 1 490f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 491f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UVersionInfo *version = static_cast<UVersionInfo *>(context); 492f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(version != NULL) { 493f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memcpy(version, pInfo->dataVersion, 4); 494f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 495f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 496f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 497f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 498f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 499f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 500f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 501b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 502b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// SpoofData::getDefault() - return a wrapper around the spoof data that is 503b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// baked into the default ICU data. 504b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 505b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofData *SpoofData::getDefault(UErrorCode &status) { 506b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // TODO: Cache it. Lazy create, keep until cleanup. 507b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 508f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UDataMemory *udm = udata_openChoice(NULL, "cfu", "confusables", 509f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius spoofDataIsAcceptable, 510f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, // context, would receive dataVersion if supplied. 511f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &status); 512b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 513b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 514b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofData *This = new SpoofData(udm, status); 516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 517b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete This; 518b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 519b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 520b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (This == NULL) { 521b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 522b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 523b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return This; 524b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 525b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 526b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofData::SpoofData(UDataMemory *udm, UErrorCode &status) 527b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 528b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reset(); 529b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 530b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 531b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 532b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fUDM = udm; 533f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius // fRawData is non-const because it may be constructed by the data builder. 534f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius fRawData = reinterpret_cast<SpoofDataHeader *>( 535f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const_cast<void *>(udata_getMemory(udm))); 536b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru validateDataVersion(fRawData, status); 537b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru initPtrs(status); 538b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 539b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 540b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 541b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofData::SpoofData(const void *data, int32_t length, UErrorCode &status) 542b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru{ 543b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reset(); 544b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 545b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 546b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 547b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((size_t)length < sizeof(SpoofDataHeader)) { 548b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_INVALID_FORMAT_ERROR; 549b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 550b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 551b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void *ncData = const_cast<void *>(data); 552b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData = static_cast<SpoofDataHeader *>(ncData); 553b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < fRawData->fLength) { 554b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_INVALID_FORMAT_ERROR; 555b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 556b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 557b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru validateDataVersion(fRawData, status); 558b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru initPtrs(status); 559b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 560b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 561b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Spoof Data constructor for use from data builder. 563b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Initializes a new, empty data area that will be populated later. 564b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofData::SpoofData(UErrorCode &status) { 565b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reset(); 566b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 567b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 568b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 569b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fDataOwned = true; 570b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRefCount = 1; 571b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 572b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // The spoof header should already be sized to be a multiple of 16 bytes. 573b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Just in case it's not, round it up. 574b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t initialSize = (sizeof(SpoofDataHeader) + 15) & ~15; 575b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(initialSize == sizeof(SpoofDataHeader)); 576b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 577b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData = static_cast<SpoofDataHeader *>(uprv_malloc(initialSize)); 578b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fMemLimit = initialSize; 579b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fRawData == NULL) { 580b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 581b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 582b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memset(fRawData, 0, initialSize); 584b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 585b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData->fMagic = USPOOF_MAGIC; 586b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData->fFormatVersion[0] = 1; 587b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData->fFormatVersion[1] = 0; 588b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData->fFormatVersion[2] = 0; 589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData->fFormatVersion[3] = 0; 590b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru initPtrs(status); 591b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 592b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 593b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// reset() - initialize all fields. 594b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Should be updated if any new fields are added. 595b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Called by constructors to put things in a known initial state. 596b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SpoofData::reset() { 597b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData = NULL; 598b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fDataOwned = FALSE; 599b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fUDM = NULL; 600b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fMemLimit = 0; 601b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRefCount = 1; 602b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUKeys = NULL; 603b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUValues = NULL; 604b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUStringLengths = NULL; 605b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUStrings = NULL; 606b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAnyCaseTrie = NULL; 607b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fLowerCaseTrie = NULL; 608b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fScriptSets = NULL; 609b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 610b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 611b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 612b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// SpoofData::initPtrs() 613b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Initialize the pointers to the various sections of the raw data. 614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 615b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// This function is used both during the Trie building process (multiple 616b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// times, as the individual data sections are added), and 617b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// during the opening of a Spoof Checker from prebuilt data. 618b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 619b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// The pointers for non-existent data sections (identified by an offset of 0) 620b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// are set to NULL. 621b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 622b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// Note: During building the data, adding each new data section 623b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// reallocs the raw data area, which likely relocates it, which 624b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// in turn requires reinitializing all of the pointers into it, hence 625b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// multiple calls to this function during building. 626b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 627b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SpoofData::initPtrs(UErrorCode &status) { 628b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUKeys = NULL; 629b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUValues = NULL; 630b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUStringLengths = NULL; 631b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUStrings = NULL; 632b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 633b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 634b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 635b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fRawData->fCFUKeys != 0) { 636b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUKeys = (int32_t *)((char *)fRawData + fRawData->fCFUKeys); 637b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 638b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fRawData->fCFUStringIndex != 0) { 639b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUValues = (uint16_t *)((char *)fRawData + fRawData->fCFUStringIndex); 640b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 641b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fRawData->fCFUStringLengths != 0) { 642b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUStringLengths = (SpoofStringLengthsElement *)((char *)fRawData + fRawData->fCFUStringLengths); 643b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 644b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fRawData->fCFUStringTable != 0) { 645b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fCFUStrings = (UChar *)((char *)fRawData + fRawData->fCFUStringTable); 646b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 647b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 648b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fAnyCaseTrie == NULL && fRawData->fAnyCaseTrie != 0) { 649b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAnyCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 650b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (char *)fRawData + fRawData->fAnyCaseTrie, fRawData->fAnyCaseTrieLength, NULL, &status); 651b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 652b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fLowerCaseTrie == NULL && fRawData->fLowerCaseTrie != 0) { 653b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fLowerCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 654b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (char *)fRawData + fRawData->fLowerCaseTrie, fRawData->fLowerCaseTrieLength, NULL, &status); 655b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 656b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 657b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fRawData->fScriptSets != 0) { 658b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fScriptSets = (ScriptSet *)((char *)fRawData + fRawData->fScriptSets); 659b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 660b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 661b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 662b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 663b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofData::~SpoofData() { 664b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru utrie2_close(fAnyCaseTrie); 665b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fAnyCaseTrie = NULL; 666b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru utrie2_close(fLowerCaseTrie); 667b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fLowerCaseTrie = NULL; 668b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fDataOwned) { 669b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_free(fRawData); 670b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 671b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData = NULL; 672b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (fUDM != NULL) { 673b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_close(fUDM); 674b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 675b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fUDM = NULL; 676b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 677b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 678b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 679b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid SpoofData::removeReference() { 680b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (umtx_atomic_dec(&fRefCount) == 0) { 681b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru delete this; 682b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 683b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 684b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 685b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 686b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruSpoofData *SpoofData::addReference() { 687b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru umtx_atomic_inc(&fRefCount); 688b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return this; 689b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 690b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 691b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 692b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruvoid *SpoofData::reserveSpace(int32_t numBytes, UErrorCode &status) { 693b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(status)) { 694b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 695b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 696b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!fDataOwned) { 697b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_ASSERT(FALSE); 698b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru status = U_INTERNAL_PROGRAM_ERROR; 699b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 701b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 702b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru numBytes = (numBytes + 15) & ~15; // Round up to a multiple of 16 703b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t returnOffset = fMemLimit; 704b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fMemLimit += numBytes; 705b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData = static_cast<SpoofDataHeader *>(uprv_realloc(fRawData, fMemLimit)); 706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fRawData->fLength = fMemLimit; 707b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memset((char *)fRawData + returnOffset, 0, numBytes); 708b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru initPtrs(status); 709b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return (char *)fRawData + returnOffset; 710b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 711b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 712b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 713b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_NAMESPACE_END 714b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 715b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_NAMESPACE_USE 716b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 717b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//----------------------------------------------------------------------------- 718b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 719b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// uspoof_swap - byte swap and char encoding swap of spoof data 720b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 721b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//----------------------------------------------------------------------------- 722b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI int32_t U_EXPORT2 723b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruuspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, 724b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *status) { 725b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 726b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (status == NULL || U_FAILURE(*status)) { 727b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 728b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 729b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { 730b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status=U_ILLEGAL_ARGUMENT_ERROR; 731b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 732b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 733b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 734b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 735b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Check that the data header is for spoof data. 736b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // (Header contents are defined in gencfu.cpp) 737b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 738b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4); 739b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="Cfu " */ 740b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[1]==0x66 && 741b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[2]==0x75 && 742b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[3]==0x20 && 743b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->formatVersion[0]==1 )) { 744b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x " 745b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru "(format version %02x %02x %02x %02x) is not recognized\n", 746b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[0], pInfo->dataFormat[1], 747b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->dataFormat[2], pInfo->dataFormat[3], 748b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->formatVersion[0], pInfo->formatVersion[1], 749b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pInfo->formatVersion[2], pInfo->formatVersion[3]); 750b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status=U_UNSUPPORTED_ERROR; 751b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 752b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 753b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 754b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 755b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific 756b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // header). This swap also conveniently gets us 757b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // the size of the ICU d.h., which lets us locate the start 758b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // of the uspoof specific data. 759b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 760b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); 761b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 762b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 763b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 764b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Get the Spoof Data Header, and check that it appears to be OK. 765b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 766b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 767b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const uint8_t *inBytes =(const uint8_t *)inData+headerSize; 768b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofDataHeader *spoofDH = (SpoofDataHeader *)inBytes; 769b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (ds->readUInt32(spoofDH->fMagic) != USPOOF_MAGIC || 770b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->readUInt32(spoofDH->fLength) < sizeof(SpoofDataHeader)) 771b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru { 772b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n"); 773b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status=U_UNSUPPORTED_ERROR; 774b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 775b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 776b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 777b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 778b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Prefight operation? Just return the size 779b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 780b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength); 781b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t totalSize = headerSize + spoofDataLength; 782b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < 0) { 783b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return totalSize; 784b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 787b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Check that length passed in is consistent with length from Spoof data header. 788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 789b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (length < totalSize) { 790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n", 791b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru spoofDataLength); 792b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *status=U_INDEX_OUTOFBOUNDS_ERROR; 793b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return 0; 794b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 795b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 796b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 797b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 798b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Swap the Data. Do the data itself first, then the Spoof Data Header, because 799b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // we need to reference the header to locate the data, and an 800b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // inplace swap of the header leaves it unusable. 801b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 802b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint8_t *outBytes = (uint8_t *)outData + headerSize; 803b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru SpoofDataHeader *outputDH = (SpoofDataHeader *)outBytes; 804b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 805b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t sectionStart; 806b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t sectionLength; 807b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 808b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 809b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // If not swapping in place, zero out the output buffer before starting. 810b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Gaps may exist between the individual sections, and these must be zeroed in 811b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // the output buffer. The simplest way to do that is to just zero the whole thing. 812b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 813b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (inBytes != outBytes) { 814b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uprv_memset(outBytes, 0, spoofDataLength); 815b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 816b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 817b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Confusables Keys Section (fCFUKeys) 818b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionStart = ds->readUInt32(spoofDH->fCFUKeys); 819b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4; 820b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 821b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 822b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // String Index Section 823b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionStart = ds->readUInt32(spoofDH->fCFUStringIndex); 824b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2; 825b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 826b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 827b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // String Table Section 828b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionStart = ds->readUInt32(spoofDH->fCFUStringTable); 829b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2; 830b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 831b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 832b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // String Lengths Section 833b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionStart = ds->readUInt32(spoofDH->fCFUStringLengths); 834b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionLength = ds->readUInt32(spoofDH->fCFUStringLengthsSize) * 4; 835b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 836b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 837b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Any Case Trie 838b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionStart = ds->readUInt32(spoofDH->fAnyCaseTrie); 839b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionLength = ds->readUInt32(spoofDH->fAnyCaseTrieLength); 840b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 841b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 842b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Lower Case Trie 843b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionStart = ds->readUInt32(spoofDH->fLowerCaseTrie); 844b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionLength = ds->readUInt32(spoofDH->fLowerCaseTrieLength); 845b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 846b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 847b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Script Sets. The data is an array of int32_t 848b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru sectionStart = ds->readUInt32(spoofDH->fScriptSets); 84927f654740f2a26ad62a5c155af9199af9e69b889claireho sectionLength = ds->readUInt32(spoofDH->fScriptSetsLength) * sizeof(ScriptSet); 850b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 851b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 852b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // And, last, swap the header itself. 853b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // int32_t fMagic // swap this 85427f654740f2a26ad62a5c155af9199af9e69b889claireho // uint8_t fFormatVersion[4] // Do not swap this, just copy 85527f654740f2a26ad62a5c155af9199af9e69b889claireho // int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff. 856b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // 857b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t magic = ds->readUInt32(spoofDH->fMagic); 858b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic); 859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (outputDH->fFormatVersion != spoofDH->fFormatVersion) { 861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion)); 862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 86327f654740f2a26ad62a5c155af9199af9e69b889claireho // swap starting at fLength 86427f654740f2a26ad62a5c155af9199af9e69b889claireho ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8 /* minus magic and fFormatVersion[4] */, &outputDH->fLength, status); 865b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 866b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return totalSize; 867b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 868b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 869b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#endif 870b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 871b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 872