1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2008-2010, International Business Machines 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uspoof.h" 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unorm.h" 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h" 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uniset.h" 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie2.h" 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cstring.h" 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "udatamem.h" 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "umutex.h" 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "udataswp.h" 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uassert.h" 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uspoof_impl.h" 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_NORMALIZATION 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl) 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofImpl::SpoofImpl(SpoofData *data, UErrorCode &status) : 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMagic(0), fSpoofData(NULL), fAllowedCharsSet(NULL) , fAllowedLocales(NULL) { 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMagic = USPOOF_MAGIC; 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSpoofData = data; 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fChecks = USPOOF_ALL_CHECKS; 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff); 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (allowedCharsSet == NULL) { 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) allowedCharsSet->freeze(); 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedCharsSet = allowedCharsSet; 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedLocales = uprv_strdup(""); 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofImpl::SpoofImpl() { 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMagic = USPOOF_MAGIC; 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSpoofData = NULL; 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fChecks = USPOOF_ALL_CHECKS; 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff); 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) allowedCharsSet->freeze(); 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedCharsSet = allowedCharsSet; 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedLocales = uprv_strdup(""); 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Copy Constructor, used by the user level clone() function. 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofImpl::SpoofImpl(const SpoofImpl &src, UErrorCode &status) : 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMagic(0), fSpoofData(NULL), fAllowedCharsSet(NULL) { 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMagic = src.fMagic; 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fChecks = src.fChecks; 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (src.fSpoofData != NULL) { 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSpoofData = src.fSpoofData->addReference(); 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCheckMask = src.fCheckMask; 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedCharsSet = static_cast<const UnicodeSet *>(src.fAllowedCharsSet->clone()); 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fAllowedCharsSet == NULL) { 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedLocales = uprv_strdup(src.fAllowedLocales); 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofImpl::~SpoofImpl() { 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMagic = 0; // head off application errors by preventing use of 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // of deleted objects. 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSpoofData != NULL) { 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fSpoofData->removeReference(); // Will delete if refCount goes to zero. 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fAllowedCharsSet; 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free((void *)fAllowedLocales); 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Incoming parameter check on Status and the SpoofChecker object 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// received from the C API. 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const SpoofImpl *SpoofImpl::validateThis(const USpoofChecker *sc, UErrorCode &status) { 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (sc == NULL) { 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ILLEGAL_ARGUMENT_ERROR; 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpoofImpl *This = (SpoofImpl *)sc; 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (This->fMagic != USPOOF_MAGIC || 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) This->fSpoofData == NULL) { 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_INVALID_FORMAT_ERROR; 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!SpoofData::validateDataVersion(This->fSpoofData->fRawData, status)) { 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return This; 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofImpl *SpoofImpl::validateThis(USpoofChecker *sc, UErrorCode &status) { 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return const_cast<SpoofImpl *> 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (SpoofImpl::validateThis(const_cast<const USpoofChecker *>(sc), status)); 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------------------- 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// confusableLookup() This is the heart of the confusable skeleton generation 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// implementation. 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Given a source character, produce the corresponding 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// replacement character(s) 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------- 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t SpoofImpl::confusableLookup(UChar32 inChar, int32_t tableMask, UChar *destBuf) const { 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Binary search the spoof data key table for the inChar 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *low = fSpoofData->fCFUKeys; 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *mid = NULL; 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *limit = low + fSpoofData->fRawData->fCFUKeysSize; 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 midc; 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t delta = ((int32_t)(limit-low))/2; 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mid = low + delta; 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) midc = *mid & 0x1fffff; 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (inChar == midc) { 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto foundChar; 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (inChar < midc) { 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit = mid; 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) low = mid; 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while (low < limit-1); 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mid = low; 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) midc = *mid & 0x1fffff; 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (inChar != midc) { 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Char not found. It maps to itself. 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i = 0; 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_APPEND_UNSAFE(destBuf, i, inChar) 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return i; 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) foundChar: 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t keyFlags = *mid & 0xff000000; 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((keyFlags & tableMask) == 0) { 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We found the right key char, but the entry doesn't pertain to the 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // table we need. See if there is an adjacent key that does 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) { 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *altMid; 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) { 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) keyFlags = *altMid & 0xff000000; 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (keyFlags & tableMask) { 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mid = altMid; 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto foundKey; 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) { 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) keyFlags = *altMid & 0xff000000; 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (keyFlags & tableMask) { 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mid = altMid; 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto foundKey; 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No key entry for this char & table. 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The input char maps to itself. 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i = 0; 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_APPEND_UNSAFE(destBuf, i, inChar) 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return i; 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) foundKey: 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1; 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t keyTableIndex = (int32_t)(mid - fSpoofData->fCFUKeys); 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Value is either a UChar (for strings of length 1) or 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // an index into the string table (for longer strings) 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t value = fSpoofData->fCFUValues[keyTableIndex]; 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (stringLen == 1) { 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) destBuf[0] = value; 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 1; 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // String length of 4 from the above lookup is used for all strings of length >= 4. 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // For these, get the real length from the string lengths table, 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // which maps string table indexes to lengths. 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // All strings of the same length are stored contiguously in the string table. 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 'value' from the lookup above is the starting index for the desired string. 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t ix; 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (stringLen == 4) { 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize; 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (ix = 0; ix < stringLengthsLimit; ix++) { 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) { 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength; 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(ix < stringLengthsLimit); 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(value + stringLen < fSpoofData->fRawData->fCFUStringTableLen); 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *src = &fSpoofData->fCFUStrings[value]; 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (ix=0; ix<stringLen; ix++) { 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) destBuf[ix] = src[ix]; 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return stringLen; 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------- 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// wholeScriptCheck() 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Input text is already normalized to NFKD 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Return the set of scripts, each of which can represent something that is 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// confusable with the input text. The script of the input text 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// is included; input consisting of characters from a single script will 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// always produce a result consisting of a set containing that script. 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//--------------------------------------------------------------------------------------- 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void SpoofImpl::wholeScriptCheck( 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *text, int32_t length, ScriptSet *result, UErrorCode &status) const { 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t inputIdx = 0; 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrie2 *table = 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fChecks & USPOOF_ANY_CASE) ? fSpoofData->fAnyCaseTrie : fSpoofData->fLowerCaseTrie; 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result->setAll(); 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (inputIdx < length) { 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_NEXT(text, inputIdx, length, c); 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t index = utrie2_get32(table, c); 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (index == 0) { 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // No confusables in another script for this char. 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: we should change the data to have sets with just the single script 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // bit for the script of this char. Gets rid of this special case. 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Until then, grab the script from the char and intersect it with the set. 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UScriptCode cpScript = uscript_getScript(c, &status); 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(cpScript > USCRIPT_INHERITED); 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result->intersect(cpScript); 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (index == 1) { 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Script == Common or Inherited. Nothing to do. 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result->intersect(fSpoofData->fScriptSets[index]); 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void SpoofImpl::setAllowedLocales(const char *localesList, UErrorCode &status) { 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet allowedChars; 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *tmpSet = NULL; 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *locStart = localesList; 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *locEnd = NULL; 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *localesListEnd = localesList + uprv_strlen(localesList); 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t localeListCount = 0; // Number of locales provided by caller. 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Loop runs once per locale from the localesList, a comma separated list of locales. 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) locEnd = uprv_strchr(locStart, ','); 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (locEnd == NULL) { 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) locEnd = localesListEnd; 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (*locStart == ' ') { 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) locStart++; 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *trimmedEnd = locEnd-1; 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (trimmedEnd > locStart && *trimmedEnd == ' ') { 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trimmedEnd--; 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (trimmedEnd <= locStart) { 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *locale = uprv_strndup(locStart, (int32_t)(trimmedEnd + 1 - locStart)); 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) localeListCount++; 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We have one locale from the locales list. 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Add the script chars for this locale to the accumulating set of allowed chars. 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If the locale is no good, we will be notified back via status. 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) addScriptChars(locale, &allowedChars, status); 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free((void *)locale); 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) locStart = locEnd + 1; 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while (locStart < localesListEnd); 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If our caller provided an empty list of locales, we disable the allowed characters checking 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (localeListCount == 0) { 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free((void *)fAllowedLocales); 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedLocales = uprv_strdup(""); 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tmpSet = new UnicodeSet(0, 0x10ffff); 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fAllowedLocales == NULL || tmpSet == NULL) { 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tmpSet->freeze(); 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fAllowedCharsSet; 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedCharsSet = tmpSet; 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCheckMask &= ~USPOOF_CHAR_LIMIT; 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Add all common and inherited characters to the set of allowed chars. 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet tempSet; 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status); 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) allowedChars.addAll(tempSet); 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status); 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) allowedChars.addAll(tempSet); 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If anything went wrong, we bail out without changing 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the state of the spoof checker. 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Store the updated spoof checker state. 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tmpSet = static_cast<UnicodeSet *>(allowedChars.clone()); 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *tmpLocalesList = uprv_strdup(localesList); 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (tmpSet == NULL || tmpLocalesList == NULL) { 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free((void *)fAllowedLocales); 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedLocales = tmpLocalesList; 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tmpSet->freeze(); 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fAllowedCharsSet; 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAllowedCharsSet = tmpSet; 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCheckMask |= USPOOF_CHAR_LIMIT; 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const char * SpoofImpl::getAllowedLocales(UErrorCode &/*status*/) { 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fAllowedLocales; 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Given a locale (a language), add all the characters from all of the scripts used with that language 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// to the allowedChars UnicodeSet 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void SpoofImpl::addScriptChars(const char *locale, UnicodeSet *allowedChars, UErrorCode &status) { 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UScriptCode scripts[30]; 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status); 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (status == U_USING_DEFAULT_WARNING) { 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ILLEGAL_ARGUMENT_ERROR; 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet tmpSet; 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i; 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<numScripts; i++) { 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status); 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) allowedChars->addAll(tmpSet); 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t SpoofImpl::scriptScan 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (const UChar *text, int32_t length, int32_t &pos, UErrorCode &status) const { 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t inputIdx = 0; 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t scriptCount = 0; 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UScriptCode lastScript = USCRIPT_INVALID_CODE; 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UScriptCode sc = USCRIPT_INVALID_CODE; 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while ((inputIdx < length || length == -1) && scriptCount < 2) { 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_NEXT(text, inputIdx, length, c); 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (c == 0 && length == -1) { 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sc = uscript_getScript(c, &status); 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (sc == USCRIPT_COMMON || sc == USCRIPT_INHERITED || sc == USCRIPT_UNKNOWN) { 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (sc != lastScript) { 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) scriptCount++; 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) lastScript = sc; 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (scriptCount == 2) { 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos = inputIdx; 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return scriptCount; 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Convert a text format hex number. Utility function used by builder code. Static. 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Input: UChar *string text. Output: a UChar32 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Input has been pre-checked, and will have no non-hex chars. 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The number must fall in the code point range of 0..0x10ffff 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Static Function. 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 SpoofImpl::ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status) { 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(limit-start > 0); 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t val = 0; 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int i; 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=start; i<limit; i++) { 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int digitVal = s[i] - 0x30; 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (digitVal>9) { 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) digitVal = 0xa + (s[i] - 0x41); // Upper Case 'A' 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (digitVal>15) { 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) digitVal = 0xa + (s[i] - 0x61); // Lower Case 'a' 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(digitVal <= 0xf); 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) val <<= 4; 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) val += digitVal; 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (val > 0x10ffff) { 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_PARSE_ERROR; 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) val = 0; 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (UChar32)val; 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------------------------- 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// class SpoofData Implementation 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------------------------- 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool SpoofData::validateDataVersion(const SpoofDataHeader *rawData, UErrorCode &status) { 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status) || 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rawData == NULL || 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rawData->fMagic != USPOOF_MAGIC || 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rawData->fFormatVersion[0] > 1 || 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rawData->fFormatVersion[1] > 0) { 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_INVALID_FORMAT_ERROR; 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// SpoofData::getDefault() - return a wrapper around the spoof data that is 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// baked into the default ICU data. 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofData *SpoofData::getDefault(UErrorCode &status) { 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // TODO: Cache it. Lazy create, keep until cleanup. 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UDataMemory *udm = udata_open(NULL, "cfu", "confusables", &status); 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpoofData *This = new SpoofData(udm, status); 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete This; 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (This == NULL) { 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return This; 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofData::SpoofData(UDataMemory *udm, UErrorCode &status) 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData = reinterpret_cast<SpoofDataHeader *> 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((char *)(udm->pHeader) + udm->pHeader->dataHeader.headerSize); 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUDM = udm; 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) validateDataVersion(fRawData, status); 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initPtrs(status); 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofData::SpoofData(const void *data, int32_t length, UErrorCode &status) 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((size_t)length < sizeof(SpoofDataHeader)) { 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_INVALID_FORMAT_ERROR; 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void *ncData = const_cast<void *>(data); 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData = static_cast<SpoofDataHeader *>(ncData); 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (length < fRawData->fLength) { 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_INVALID_FORMAT_ERROR; 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) validateDataVersion(fRawData, status); 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initPtrs(status); 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Spoof Data constructor for use from data builder. 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Initializes a new, empty data area that will be populated later. 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofData::SpoofData(UErrorCode &status) { 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDataOwned = true; 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRefCount = 1; 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // The spoof header should already be sized to be a multiple of 16 bytes. 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Just in case it's not, round it up. 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t initialSize = (sizeof(SpoofDataHeader) + 15) & ~15; 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(initialSize == sizeof(SpoofDataHeader)); 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData = static_cast<SpoofDataHeader *>(uprv_malloc(initialSize)); 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMemLimit = initialSize; 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fRawData == NULL) { 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(fRawData, 0, initialSize); 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData->fMagic = USPOOF_MAGIC; 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData->fFormatVersion[0] = 1; 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData->fFormatVersion[1] = 0; 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData->fFormatVersion[2] = 0; 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData->fFormatVersion[3] = 0; 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initPtrs(status); 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// reset() - initialize all fields. 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Should be updated if any new fields are added. 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Called by constructors to put things in a known initial state. 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void SpoofData::reset() { 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData = NULL; 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fDataOwned = FALSE; 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUDM = NULL; 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMemLimit = 0; 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRefCount = 1; 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUKeys = NULL; 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUValues = NULL; 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUStringLengths = NULL; 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUStrings = NULL; 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAnyCaseTrie = NULL; 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLowerCaseTrie = NULL; 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fScriptSets = NULL; 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// SpoofData::initPtrs() 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Initialize the pointers to the various sections of the raw data. 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This function is used both during the Trie building process (multiple 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// times, as the individual data sections are added), and 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// during the opening of a Spoof Checker from prebuilt data. 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// The pointers for non-existent data sections (identified by an offset of 0) 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// are set to NULL. 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Note: During building the data, adding each new data section 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// reallocs the raw data area, which likely relocates it, which 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// in turn requires reinitializing all of the pointers into it, hence 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// multiple calls to this function during building. 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void SpoofData::initPtrs(UErrorCode &status) { 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUKeys = NULL; 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUValues = NULL; 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUStringLengths = NULL; 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUStrings = NULL; 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fRawData->fCFUKeys != 0) { 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUKeys = (int32_t *)((char *)fRawData + fRawData->fCFUKeys); 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fRawData->fCFUStringIndex != 0) { 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUValues = (uint16_t *)((char *)fRawData + fRawData->fCFUStringIndex); 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fRawData->fCFUStringLengths != 0) { 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUStringLengths = (SpoofStringLengthsElement *)((char *)fRawData + fRawData->fCFUStringLengths); 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fRawData->fCFUStringTable != 0) { 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fCFUStrings = (UChar *)((char *)fRawData + fRawData->fCFUStringTable); 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fAnyCaseTrie == NULL && fRawData->fAnyCaseTrie != 0) { 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAnyCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (char *)fRawData + fRawData->fAnyCaseTrie, fRawData->fAnyCaseTrieLength, NULL, &status); 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fLowerCaseTrie == NULL && fRawData->fLowerCaseTrie != 0) { 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLowerCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (char *)fRawData + fRawData->fLowerCaseTrie, fRawData->fLowerCaseTrieLength, NULL, &status); 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fRawData->fScriptSets != 0) { 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fScriptSets = (ScriptSet *)((char *)fRawData + fRawData->fScriptSets); 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofData::~SpoofData() { 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_close(fAnyCaseTrie); 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fAnyCaseTrie = NULL; 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_close(fLowerCaseTrie); 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fLowerCaseTrie = NULL; 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fDataOwned) { 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(fRawData); 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData = NULL; 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fUDM != NULL) { 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) udata_close(fUDM); 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUDM = NULL; 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void SpoofData::removeReference() { 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (umtx_atomic_dec(&fRefCount) == 0) { 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete this; 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)SpoofData *SpoofData::addReference() { 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) umtx_atomic_inc(&fRefCount); 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return this; 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void *SpoofData::reserveSpace(int32_t numBytes, UErrorCode &status) { 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!fDataOwned) { 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(FALSE); 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_INTERNAL_PROGRAM_ERROR; 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) numBytes = (numBytes + 15) & ~15; // Round up to a multiple of 16 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t returnOffset = fMemLimit; 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fMemLimit += numBytes; 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData = static_cast<SpoofDataHeader *>(uprv_realloc(fRawData, fMemLimit)); 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fRawData->fLength = fMemLimit; 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset((char *)fRawData + returnOffset, 0, numBytes); 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initPtrs(status); 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (char *)fRawData + returnOffset; 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------- 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// ScriptSet implementation 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//---------------------------------------------------------------------------- 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ScriptSet::ScriptSet() { 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) { 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[i] = 0; 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ScriptSet::~ScriptSet() { 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool ScriptSet::operator == (const ScriptSet &other) { 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) { 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (bits[i] != other.bits[i]) { 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ScriptSet::Union(UScriptCode script) { 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t index = script / 32; 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t bit = 1 << (script & 31); 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(index < sizeof(bits)*4); 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[index] |= bit; 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ScriptSet::Union(const ScriptSet &other) { 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) { 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[i] |= other.bits[i]; 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ScriptSet::intersect(const ScriptSet &other) { 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) { 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[i] &= other.bits[i]; 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ScriptSet::intersect(UScriptCode script) { 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t index = script / 32; 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t bit = 1 << (script & 31); 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_ASSERT(index < sizeof(bits)*4); 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t i; 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<index; i++) { 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[i] = 0; 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[index] &= bit; 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=index+1; i<sizeof(bits)/sizeof(uint32_t); i++) { 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[i] = 0; 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) { 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[i] = other.bits[i]; 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return *this; 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ScriptSet::setAll() { 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) { 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[i] = 0xffffffffu; 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void ScriptSet::resetAll() { 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) { 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bits[i] = 0; 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t ScriptSet::countMembers() { 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This bit counter is good for sparse numbers of '1's, which is 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // very much the case that we will usually have. 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count = 0; 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (uint32_t i=0; i<sizeof(bits)/sizeof(uint32_t); i++) { 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t x = bits[i]; 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (x > 0) { 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count++; 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) x &= (x - 1); // and off the least significant one bit. 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return count; 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------- 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// NFKDBuffer Implementation. 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------- 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)NFKDBuffer::NFKDBuffer(const UChar *text, int32_t length, UErrorCode &status) { 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNormalizedText = NULL; 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNormalizedTextLength = 0; 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOriginalText = text; 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNormalizedText = fSmallBuf; 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNormalizedTextLength = unorm_normalize( 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text, length, UNORM_NFKD, 0, fNormalizedText, USPOOF_STACK_BUFFER_SIZE, &status); 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (status == U_BUFFER_OVERFLOW_ERROR) { 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_ZERO_ERROR; 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNormalizedText = (UChar *)uprv_malloc((fNormalizedTextLength+1)*sizeof(UChar)); 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNormalizedText == NULL) { 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNormalizedTextLength = unorm_normalize(text, length, UNORM_NFKD, 0, 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNormalizedText, fNormalizedTextLength+1, &status); 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)NFKDBuffer::~NFKDBuffer() { 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fNormalizedText != fSmallBuf) { 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(fNormalizedText); 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNormalizedText = 0; 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)const UChar *NFKDBuffer::getBuffer() { 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fNormalizedText; 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t NFKDBuffer::getLength() { 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fNormalizedTextLength; 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_USE 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------- 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// uspoof_swap - byte swap and char encoding swap of spoof data 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//----------------------------------------------------------------------------- 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status) { 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (status == NULL || U_FAILURE(*status)) { 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status=U_ILLEGAL_ARGUMENT_ERROR; 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check that the data header is for spoof data. 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (Header contents are defined in gencfu.cpp) 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4); 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!( pInfo->dataFormat[0]==0x43 && /* dataFormat="Cfu " */ 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pInfo->dataFormat[1]==0x66 && 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pInfo->dataFormat[2]==0x75 && 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pInfo->dataFormat[3]==0x20 && 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pInfo->formatVersion[0]==1 )) { 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x " 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "(format version %02x %02x %02x %02x) is not recognized\n", 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pInfo->dataFormat[0], pInfo->dataFormat[1], 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pInfo->dataFormat[2], pInfo->dataFormat[3], 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pInfo->formatVersion[0], pInfo->formatVersion[1], 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pInfo->formatVersion[2], pInfo->formatVersion[3]); 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status=U_UNSUPPORTED_ERROR; 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // header). This swap also conveniently gets us 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the size of the ICU d.h., which lets us locate the start 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // of the uspoof specific data. 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Get the Spoof Data Header, and check that it appears to be OK. 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *inBytes =(const uint8_t *)inData+headerSize; 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpoofDataHeader *spoofDH = (SpoofDataHeader *)inBytes; 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ds->readUInt32(spoofDH->fMagic) != USPOOF_MAGIC || 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ds->readUInt32(spoofDH->fLength) < sizeof(SpoofDataHeader)) 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n"); 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status=U_UNSUPPORTED_ERROR; 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Prefight operation? Just return the size 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength); 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t totalSize = headerSize + spoofDataLength; 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (length < 0) { 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return totalSize; 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check that length passed in is consistent with length from Spoof data header. 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (length < totalSize) { 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n", 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) spoofDataLength); 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status=U_INDEX_OUTOFBOUNDS_ERROR; 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Swap the Data. Do the data itself first, then the Spoof Data Header, because 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we need to reference the header to locate the data, and an 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // inplace swap of the header leaves it unusable. 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *outBytes = (uint8_t *)outData + headerSize; 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) SpoofDataHeader *outputDH = (SpoofDataHeader *)outBytes; 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sectionStart; 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sectionLength; 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If not swapping in place, zero out the output buffer before starting. 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Gaps may exist between the individual sections, and these must be zeroed in 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the output buffer. The simplest way to do that is to just zero the whole thing. 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (inBytes != outBytes) { 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(outBytes, 0, spoofDataLength); 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Confusables Keys Section (fCFUKeys) 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionStart = ds->readUInt32(spoofDH->fCFUKeys); 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4; 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // String Index Section 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionStart = ds->readUInt32(spoofDH->fCFUStringIndex); 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2; 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // String Table Section 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionStart = ds->readUInt32(spoofDH->fCFUStringTable); 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2; 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // String Lengths Section 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionStart = ds->readUInt32(spoofDH->fCFUStringLengths); 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionLength = ds->readUInt32(spoofDH->fCFUStringLengthsSize) * 4; 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Any Case Trie 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionStart = ds->readUInt32(spoofDH->fAnyCaseTrie); 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionLength = ds->readUInt32(spoofDH->fAnyCaseTrieLength); 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Lower Case Trie 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionStart = ds->readUInt32(spoofDH->fLowerCaseTrie); 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionLength = ds->readUInt32(spoofDH->fLowerCaseTrieLength); 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Script Sets. The data is an array of int32_t 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionStart = ds->readUInt32(spoofDH->fScriptSets); 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sectionLength = ds->readUInt32(spoofDH->fScriptSetsLength) * sizeof(ScriptSet); 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status); 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // And, last, swap the header itself. 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // int32_t fMagic // swap this 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // uint8_t fFormatVersion[4] // Do not swap this, just copy 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff. 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t magic = ds->readUInt32(spoofDH->fMagic); 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic); 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion)); 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // swap starting at fLength 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8 /* minus magic and fFormatVersion[4] */, &outputDH->fLength, status); 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return totalSize; 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 965