150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2009-2011, International Business Machines 550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Corporation and others. All Rights Reserved. 650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* file name: normalizer2impl.cpp 950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* encoding: US-ASCII 1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* tab size: 8 (not used) 1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* indentation:4 1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created on: 2009nov22 1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created by: Markus W. Scherer 1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/ 1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h" 1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/normalizer2.h" 2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/udata.h" 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h" 2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "cmemory.h" 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "mutex.h" 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uassert.h" 2827f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uhash.h" 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uset_imp.h" 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "utrie2.h" 3127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uvector.h" 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// ReorderingBuffer -------------------------------------------------------- *** 3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) { 3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=str.length(); 3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho start=str.getBuffer(destCapacity); 4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(start==NULL) { 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // getBuffer() already did str.setToBogus() 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit=start+length; 4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity=str.getCapacity()-length; 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=start; 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(start==limit) { 4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastCC=0; 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho setIterator(); 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastCC=previousCC(); 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set reorderStart after the last code point with cc<=1 if there is one. 5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(lastCC>1) { 5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(previousCC()>1) {} 5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=codePointLimit; 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const { 6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=(int32_t)(limit-start); 6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length==(int32_t)(otherLimit-otherStart) && 6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 0==u_memcmp(start, otherStart, length); 6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) { 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(remainingCapacity<2 && !resize(2, errorCode)) { 7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(lastCC<=cc || cc==0) { 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit[0]=U16_LEAD(c); 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit[1]=U16_TRAIL(c); 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit+=2; 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastCC=cc; 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cc<=1) { 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=limit; 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho insert(c, cc); 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity-=2; 8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::append(const UChar *s, int32_t length, 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t leadCC, uint8_t trailCC, 9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) { 9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(length==0) { 9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(remainingCapacity<length && !resize(length, errorCode)) { 9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity-=length; 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(lastCC<=leadCC || leadCC==0) { 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(trailCC<=1) { 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=limit+length; 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(leadCC<=1) { 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=limit+1; // Ok if not a code point boundary. 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *sLimit=s+length; 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { *limit++=*s++; } while(s!=sLimit); 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastCC=trailCC; 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i=0; 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(s, i, length, c); 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho insert(c, leadCC); // insert first code point 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(i<length) { 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(s, i, length, c); 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(i<length) { 11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // s must be in NFD, otherwise we need to use getCC(). 11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c)); 11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho leadCC=trailCC; 11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho append(c, leadCC, errorCode); 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 12650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) { 12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t cpLength=U16_LENGTH(c); 12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) { 12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity-=cpLength; 13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cpLength==1) { 13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *limit++=(UChar)c; 13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit[0]=U16_LEAD(c); 13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit[1]=U16_TRAIL(c); 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit+=2; 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastCC=0; 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=limit; 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode) { 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(s==sLimit) { 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=(int32_t)(sLimit-s); 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(remainingCapacity<length && !resize(length, errorCode)) { 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_memcpy(limit, s, length); 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit+=length; 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity-=length; 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastCC=0; 15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=limit; 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::remove() { 16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=limit=start; 16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity=str.getCapacity(); 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastCC=0; 16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 16650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::removeSuffix(int32_t suffixLength) { 16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(suffixLength<(limit-start)) { 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit-=suffixLength; 16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity+=suffixLength; 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit=start; 17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity=str.getCapacity(); 17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho lastCC=0; 17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=limit; 17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) { 17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t reorderStartIndex=(int32_t)(reorderStart-start); 18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=(int32_t)(limit-start); 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho str.releaseBuffer(length); 18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newCapacity=length+appendLength; 18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t doubleCapacity=2*str.getCapacity(); 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(newCapacity<doubleCapacity) { 18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newCapacity=doubleCapacity; 18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(newCapacity<256) { 18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newCapacity=256; 18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho start=str.getBuffer(newCapacity); 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(start==NULL) { 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // getBuffer() already did str.setToBogus() 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=start+reorderStartIndex; 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit=start+length; 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho remainingCapacity=str.getCapacity()-length; 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 20250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::skipPrevious() { 20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho codePointLimit=codePointStart; 20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c=*--codePointStart; 20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) { 20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --codePointStart; 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 21050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouint8_t ReorderingBuffer::previousCC() { 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho codePointLimit=codePointStart; 21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(reorderStart>=codePointStart) { 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c=*--codePointStart; 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(c<Normalizer2Impl::MIN_CCC_LCCC_CP) { 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c2; 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) { 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --codePointStart; 22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c2, c); 22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c)); 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Inserts c somewhere before the last character. 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Requires 0<cc<lastCC which implies reorderStart<limit. 23050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::insert(UChar32 c, uint8_t cc) { 23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(setIterator(), skipPrevious(); previousCC()>cc;) {} 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // insert c at codePointLimit, after the character with prevCC<=cc 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *q=limit; 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *r=limit+=U16_LENGTH(c); 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *--r=*--q; 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(codePointLimit!=q); 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho writeCodePoint(q, c); 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cc<=1) { 24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorderStart=r; 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Normalizer2Impl --------------------------------------------------------- *** 24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24627f654740f2a26ad62a5c155af9199af9e69b889clairehostruct CanonIterData : public UMemory { 24727f654740f2a26ad62a5c155af9199af9e69b889claireho CanonIterData(UErrorCode &errorCode); 24827f654740f2a26ad62a5c155af9199af9e69b889claireho ~CanonIterData(); 24927f654740f2a26ad62a5c155af9199af9e69b889claireho void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode); 25027f654740f2a26ad62a5c155af9199af9e69b889claireho UTrie2 *trie; 25127f654740f2a26ad62a5c155af9199af9e69b889claireho UVector canonStartSets; // contains UnicodeSet * 25227f654740f2a26ad62a5c155af9199af9e69b889claireho}; 25327f654740f2a26ad62a5c155af9199af9e69b889claireho 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::~Normalizer2Impl() { 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_close(memory); 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_close(normTrie); 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTrie2Singleton(fcdTrieSingleton).deleteInstance(); 25827f654740f2a26ad62a5c155af9199af9e69b889claireho delete (CanonIterData *)canonIterDataSingleton.fInstance; 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool U_CALLCONV 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::isAcceptable(void *context, 26327f654740f2a26ad62a5c155af9199af9e69b889claireho const char * /* type */, const char * /*name*/, 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UDataInfo *pInfo) { 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->size>=20 && 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->isBigEndian==U_IS_BIG_ENDIAN && 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->charsetFamily==U_CHARSET_FAMILY && 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[1]==0x72 && 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[2]==0x6d && 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[3]==0x32 && 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->formatVersion[0]==1 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2Impl *me=(Normalizer2Impl *)context; 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const int32_t *inIndexes=(const int32_t *)inBytes; 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(indexesLength<=IX_MIN_MAYBE_YES) { 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP]; 30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP]; 30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho minYesNo=inIndexes[IX_MIN_YES_NO]; 30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho minNoNo=inIndexes[IX_MIN_NO_NO]; 30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limitNoNo=inIndexes[IX_LIMIT_NO_NO]; 30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho minMaybeYes=inIndexes[IX_MIN_MAYBE_YES]; 30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; 31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inBytes+offset, nextOffset-offset, NULL, 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho &errorCode); 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho offset=nextOffset; 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho maybeYesCompositions=(const uint16_t *)(inBytes+offset); 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes); 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouint8_t Normalizer2Impl::getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const { 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cpStart==(cpLimit-1)) { 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=*cpStart; 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]); 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t prevNorm16=getNorm16(c); 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevNorm16<=minYesNo) { 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (uint8_t)(*getMapping(prevNorm16)>>8); // tccc from yesNo 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 33950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV 34050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) { 34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* add the start code point to the USet */ 34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const USetAdder *sa=(const USetAdder *)context; 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sa->add(sa->set, start); 34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 34727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic uint32_t U_CALLCONV 34827f654740f2a26ad62a5c155af9199af9e69b889clairehosegmentStarterMapper(const void * /*context*/, uint32_t value) { 34927f654740f2a26ad62a5c155af9199af9e69b889claireho return value&CANON_NOT_SEGMENT_STARTER; 35027f654740f2a26ad62a5c155af9199af9e69b889claireho} 35127f654740f2a26ad62a5c155af9199af9e69b889claireho 35250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END 35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 35450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid 35527f654740f2a26ad62a5c155af9199af9e69b889clairehoNormalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const { 35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* add the start code point of each same-value range of each trie */ 35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_enum(normTrie, NULL, enumPropertyStartsRange, sa); 35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* add Hangul LV syllables and LV+1 because of skippables */ 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) { 36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sa->add(sa->set, c); 36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sa->add(sa->set, c+1); 36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ 36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 36727f654740f2a26ad62a5c155af9199af9e69b889clairehovoid 36827f654740f2a26ad62a5c155af9199af9e69b889clairehoNormalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const { 36927f654740f2a26ad62a5c155af9199af9e69b889claireho /* add the start code point of each same-value range of the canonical iterator data trie */ 37027f654740f2a26ad62a5c155af9199af9e69b889claireho if(ensureCanonIterData(errorCode)) { 37127f654740f2a26ad62a5c155af9199af9e69b889claireho // currently only used for the SEGMENT_STARTER property 37227f654740f2a26ad62a5c155af9199af9e69b889claireho utrie2_enum(((CanonIterData *)canonIterDataSingleton.fInstance)->trie, 37327f654740f2a26ad62a5c155af9199af9e69b889claireho segmentStarterMapper, enumPropertyStartsRange, sa); 37427f654740f2a26ad62a5c155af9199af9e69b889claireho } 37527f654740f2a26ad62a5c155af9199af9e69b889claireho} 37627f654740f2a26ad62a5c155af9199af9e69b889claireho 37750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar * 37850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src, 37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 minNeedDataCP, 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer *buffer, 38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Make some effort to support NUL-terminated strings reasonably. 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Take the part of the fast quick check loop that does not look up 38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // data and check the first part of the string. 38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // After this prefix, determine the string length to simplify the rest 38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of the code. 38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevSrc=src; 38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c; 38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((c=*src++)<minNeedDataCP && c!=0) {} 39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back out the last character for full processing. 39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Copy this prefix. 39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(--src!=prevSrc) { 39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(buffer!=NULL) { 39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer->appendZeroCC(prevSrc, src, errorCode); 39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return src; 39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Dual functionality: 40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer!=NULL: normalize 40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer==NULL: isNormalized/spanQuickCheckYes 40350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar * 40450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::decompose(const UChar *src, const UChar *limit, 40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer *buffer, 40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 minNoCP=minDecompNoCP; 40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(limit==NULL) { 40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode); 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return src; 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit=u_strchr(src, 0); 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevSrc; 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c=0; 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16=0; 41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // only for quick check 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevBoundary=src; 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t prevCC=0; 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // count code units below the minimum or with irrelevant data for the quick check 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(prevSrc=src; src!=limit;) { 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (c=*src)<minNoCP || 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c)) 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(!U16_IS_SURROGATE(c)) { 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c2; 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U16_IS_SURROGATE_LEAD(c)) { 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c, c2); 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else /* trail surrogate */ { 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) { 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --src; 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c2, c); 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) { 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src+=U16_LENGTH(c); 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // copy these code units all at once 45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src!=prevSrc) { 45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(buffer!=NULL) { 45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!buffer->appendZeroCC(prevSrc, src, errorCode)) { 45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=0; 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src; 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src==limit) { 46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check one above-minimum, relevant code point. 46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src+=U16_LENGTH(c); 46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(buffer!=NULL) { 47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!decompose(c, norm16, *buffer, errorCode)) { 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isDecompYes(norm16)) { 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t cc=getCCFromYesOrMaybe(norm16); 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevCC<=cc || cc==0) { 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=cc; 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cc<=1) { 47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src; 48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return prevBoundary; // "no" or cc out of order 48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return src; 48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Decompose a short piece of text which is likely to contain characters that 49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// fail the quick check loop and/or where the quick check loop's overhead 49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// is unlikely to be amortized. 49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Called by the compose() and makeFCD() implementations. 49450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit, 49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer &buffer, 49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(src<limit) { 49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 49950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16; 50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16); 50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!decompose(c, norm16, buffer, errorCode)) { 50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 50650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 50850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16, 50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer &buffer, 51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 51150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Only loops for 1:1 algorithmic mappings. 51250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 51350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // get the decomposition and the lead and trail cc's 51450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isDecompYes(norm16)) { 51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c does not decompose 51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode); 51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(isHangul(norm16)) { 51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Hangul syllable: decompose algorithmically 51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar jamos[3]; 52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode); 52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(isDecompNoAlgorithmic(norm16)) { 52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=mapAlgorithmic(c, norm16); 52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=getNorm16(c); 52450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c decomposes, get everything from the variable-length extra data 52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint16_t *mapping=getMapping(norm16); 52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t firstUnit=*mapping++; 52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=firstUnit&MAPPING_LENGTH_MASK; 52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t leadCC, trailCC; 53050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho trailCC=(uint8_t)(firstUnit>>8); 53150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { 53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho leadCC=(uint8_t)(*mapping++>>8); 53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho leadCC=0; 53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return buffer.append((const UChar *)mapping, length, leadCC, trailCC, errorCode); 53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 54150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar * 54250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const { 54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *decomp=NULL; 54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16; 54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) { 54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c does not decompose 54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return decomp; 54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(isHangul(norm16)) { 55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Hangul syllable: decompose algorithmically 55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length=Hangul::decompose(c, buffer); 55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return buffer; 55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(isDecompNoAlgorithmic(norm16)) { 55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=mapAlgorithmic(c, norm16); 55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decomp=buffer; 55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length=0; 55750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_APPEND_UNSAFE(buffer, length, c); 55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 55950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c decomposes, get everything from the variable-length extra data 56050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint16_t *mapping=getMapping(norm16); 56150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t firstUnit=*mapping++; 56250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length=firstUnit&MAPPING_LENGTH_MASK; 56350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { 56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++mapping; 56550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (const UChar *)mapping; 56750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 57150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit, 57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool doDecompose, 573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString &safeMiddle, 57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer &buffer, 57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buffer.copyReorderableSuffixTo(safeMiddle); 57750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(doDecompose) { 57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decompose(src, limit, &buffer, errorCode); 57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Just merge the strings at the boundary. 58250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ForwardUTrie2StringIterator iter(normTrie, src, limit); 58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t firstCC, prevCC, cc; 58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstCC=prevCC=cc=getCC(iter.next16()); 58550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(cc!=0) { 58650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=cc; 58750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cc=getCC(iter.next16()); 58850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 589b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(limit==NULL) { // appendZeroCC() needs limit!=NULL 590b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho limit=u_strchr(iter.codePointStart, 0); 591b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode) && 59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.appendZeroCC(iter.codePointStart, limit, errorCode); 59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 59550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Note: hasDecompBoundary() could be implemented as aliases to 59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hasFCDBoundaryBefore() and hasFCDBoundaryAfter() 59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// at the cost of building the FCD trie for a decomposition normalizer. 59950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasDecompBoundary(UChar32 c, UBool before) const { 60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(c<minDecompNoCP) { 60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16=getNorm16(c); 60550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) { 60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 60750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(norm16>MIN_NORMAL_MAYBE_YES) { 60850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; // ccc!=0 60950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(isDecompNoAlgorithmic(norm16)) { 61050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=mapAlgorithmic(c, norm16); 61150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 61250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c decomposes, get everything from the variable-length extra data 61350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint16_t *mapping=getMapping(norm16); 61450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t firstUnit=*mapping++; 61550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((firstUnit&MAPPING_LENGTH_MASK)==0) { 61650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 61750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 61850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!before) { 61950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // decomp after-boundary: same as hasFCDBoundaryAfter(), 62050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // fcd16<=1 || trailCC==0 62150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(firstUnit>0x1ff) { 62250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; // trailCC>1 62350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 62450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(firstUnit<=0xff) { 62550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; // trailCC==0 62650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 62750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // if(trailCC==1) test leadCC==0, same as checking for before-boundary 62850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 62950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TRUE if leadCC==0 (hasFCDBoundaryBefore()) 63050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*mapping&0xff00)==0; 63150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 63250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 63350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 63450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 63550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 63650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Finds the recomposition result for 63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a forward-combining "lead" character, 63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * specified with a pointer to its compositions list, 63950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and a backward-combining "trail" character. 64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 64150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the lead and trail characters combine, then this function returns 64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the following "compositeAndFwd" value: 64350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Bits 21..1 composite character 64450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Bit 0 set if the composite is a forward-combining starter 64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * otherwise it returns -1. 64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The compositions list has (trail, compositeAndFwd) pair entries, 64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * encoded as either pairs or triples of 16-bit units. 64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The last entry has the high bit of its first unit set. 65050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 65150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The list is sorted by ascending trail characters (there are no duplicates). 65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * A linear search is used. 65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * See normalizer2impl.h for a more detailed description 65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * of the compositions list format. 65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 65750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) { 65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t key1, firstUnit; 65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(trail<COMP_1_TRAIL_LIMIT) { 66050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // trail character is 0..33FF 66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // result entry may have 2 or 3 units 66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho key1=(uint16_t)(trail<<1); 66350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(key1>(firstUnit=*list)) { 66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho list+=2+(firstUnit&COMP_1_TRIPLE); 66550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { 66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(firstUnit&COMP_1_TRIPLE) { 66850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ((int32_t)list[1]<<16)|list[2]; 66950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 67050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return list[1]; 67150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 67350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 67450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // trail character is 3400..10FFFF 67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // result entry has 3 units 67650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho key1=(uint16_t)(COMP_1_TRAIL_LIMIT+ 67727f654740f2a26ad62a5c155af9199af9e69b889claireho (((trail>>COMP_1_TRAIL_SHIFT))& 67827f654740f2a26ad62a5c155af9199af9e69b889claireho ~COMP_1_TRIPLE)); 67950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT); 68050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t secondUnit; 68150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(key1>(firstUnit=*list)) { 68350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho list+=2+(firstUnit&COMP_1_TRIPLE); 68450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { 68550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(key2>(secondUnit=list[1])) { 68650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(firstUnit&COMP_1_LAST_TUPLE) { 68750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 68850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 68950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho list+=3; 69050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 69150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { 69250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2]; 69350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 69450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 69550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 69650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 69750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 69850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 69950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 70050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 70150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 70250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 70350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 70427f654740f2a26ad62a5c155af9199af9e69b889claireho/** 70527f654740f2a26ad62a5c155af9199af9e69b889claireho * @param list some character's compositions list 70627f654740f2a26ad62a5c155af9199af9e69b889claireho * @param set recursively receives the composites from these compositions 70727f654740f2a26ad62a5c155af9199af9e69b889claireho */ 70827f654740f2a26ad62a5c155af9199af9e69b889clairehovoid Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const { 70927f654740f2a26ad62a5c155af9199af9e69b889claireho uint16_t firstUnit; 71027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t compositeAndFwd; 71127f654740f2a26ad62a5c155af9199af9e69b889claireho do { 71227f654740f2a26ad62a5c155af9199af9e69b889claireho firstUnit=*list; 71327f654740f2a26ad62a5c155af9199af9e69b889claireho if((firstUnit&COMP_1_TRIPLE)==0) { 71427f654740f2a26ad62a5c155af9199af9e69b889claireho compositeAndFwd=list[1]; 71527f654740f2a26ad62a5c155af9199af9e69b889claireho list+=2; 71627f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 71727f654740f2a26ad62a5c155af9199af9e69b889claireho compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2]; 71827f654740f2a26ad62a5c155af9199af9e69b889claireho list+=3; 71927f654740f2a26ad62a5c155af9199af9e69b889claireho } 72027f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 composite=compositeAndFwd>>1; 72127f654740f2a26ad62a5c155af9199af9e69b889claireho if((compositeAndFwd&1)!=0) { 72227f654740f2a26ad62a5c155af9199af9e69b889claireho addComposites(getCompositionsListForComposite(getNorm16(composite)), set); 72327f654740f2a26ad62a5c155af9199af9e69b889claireho } 72427f654740f2a26ad62a5c155af9199af9e69b889claireho set.add(composite); 72527f654740f2a26ad62a5c155af9199af9e69b889claireho } while((firstUnit&COMP_1_LAST_TUPLE)==0); 72627f654740f2a26ad62a5c155af9199af9e69b889claireho} 72727f654740f2a26ad62a5c155af9199af9e69b889claireho 72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Recomposes the buffer text starting at recomposeStartIndex 73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (which is in NFD - decomposed and canonically ordered), 73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and truncates the buffer contents. 73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that recomposition never lengthens the text: 73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Any character consists of either one or two code units; 73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a composition may contain at most one more code unit than the original starter, 73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * while the combining mark that is removed has at least one code unit. 73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 73850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, 73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool onlyContiguous) const { 74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *p=buffer.getStart()+recomposeStartIndex; 74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *limit=buffer.getLimit(); 74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p==limit) { 74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 74450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 74550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *starter, *pRemove, *q, *r; 74750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint16_t *compositionsList; 74850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c, compositeAndFwd; 74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16; 75050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t cc, prevCC; 75150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool starterIsSupplementary; 75250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 75350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Some of the following variables are not used until we have a forward-combining starter 75450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // and are only initialized now to avoid compiler warnings. 75550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compositionsList=NULL; // used as indicator for whether we have a forward-combining starter 75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starter=NULL; 75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starterIsSupplementary=FALSE; 75850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=0; 75950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 76050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 76150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16); 76250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cc=getCCFromYesOrMaybe(norm16); 76350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( // this character combines backward and 76450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMaybe(norm16) && 76550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // we have seen a starter that combines forward and 76650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compositionsList!=NULL && 76750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the backward-combining character is not blocked 76850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (prevCC<cc || prevCC==0) 76950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 77050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isJamoVT(norm16)) { 77150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c is a Jamo V/T, see if we can compose it with the previous character. 77250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(c<Hangul::JAMO_T_BASE) { 77350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. 77450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE); 77550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prev<Hangul::JAMO_L_COUNT) { 77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pRemove=p-1; 77750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar syllable=(UChar) 77850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (Hangul::HANGUL_BASE+ 77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))* 78050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Hangul::JAMO_T_COUNT); 78150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar t; 78250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) { 78350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++p; 78450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho syllable+=t; // The next character was a Jamo T. 78550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 78650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *starter=syllable; 78750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // remove the Jamo V/T 78850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho q=pRemove; 78950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho r=p; 79050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(r<limit) { 79150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *q++=*r++; 79250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 79350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit=q; 79450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p=pRemove; 79550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 79650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 79750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 79850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * No "else" for Jamo T: 79950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Since the input is in NFD, there are no Hangul LV syllables that 80050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a Jamo T could combine with. 80150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * All Jamo Ts are combined above when handling Jamo Vs. 80250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 80350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p==limit) { 80450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 80550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 80650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compositionsList=NULL; 80750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 80850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if((compositeAndFwd=combine(compositionsList, c))>=0) { 80950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The starter and the combining mark (c) do combine. 81050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 composite=compositeAndFwd>>1; 81150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 81250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Replace the starter with the composite, remove the combining mark. 81350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark 81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(starterIsSupplementary) { 81550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_IS_SUPPLEMENTARY(composite)) { 81650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // both are supplementary 81750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starter[0]=U16_LEAD(composite); 81850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starter[1]=U16_TRAIL(composite); 81950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 82050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *starter=(UChar)composite; 82150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The composite is shorter than the starter, 82250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // move the intermediate characters forward one. 82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starterIsSupplementary=FALSE; 82450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho q=starter+1; 82550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho r=q+1; 82650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(r<pRemove) { 82750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *q++=*r++; 82850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 82950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --pRemove; 83050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 83150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(U_IS_SUPPLEMENTARY(composite)) { 83250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The composite is longer than the starter, 83350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // move the intermediate characters back one. 83450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starterIsSupplementary=TRUE; 83550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++starter; // temporarily increment for the loop boundary 83650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho q=pRemove; 83750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho r=++pRemove; 83850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(starter<q) { 83950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *--r=*--q; 84050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 84150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *starter=U16_TRAIL(composite); 84250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *--starter=U16_LEAD(composite); // undo the temporary increment 84350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 84450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // both are on the BMP 84550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *starter=(UChar)composite; 84650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 84750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 84850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* remove the combining mark by moving the following text over it */ 84950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pRemove<p) { 85050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho q=pRemove; 85150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho r=p; 85250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(r<limit) { 85350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *q++=*r++; 85450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 85550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit=q; 85650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p=pRemove; 85750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 85850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Keep prevCC because we removed the combining mark. 85950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 86050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p==limit) { 86150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 86250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Is the composite a starter that combines forward? 86450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(compositeAndFwd&1) { 86550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compositionsList= 86650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho getCompositionsListForComposite(getNorm16(composite)); 86750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 86850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compositionsList=NULL; 86950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 87050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 87150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We combined; continue with looking for compositions. 87250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 87350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 87450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 87550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 87650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // no combination this time 87750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=cc; 87850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p==limit) { 87950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 88050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 88150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 88250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If c did not combine, then check if it is a starter. 88350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cc==0) { 88450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Found a new starter. 88550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) { 88650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It may combine with something, prepare for it. 88750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_IS_BMP(c)) { 88850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starterIsSupplementary=FALSE; 88950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starter=p-1; 89050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 89150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starterIsSupplementary=TRUE; 89250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starter=p-2; 89350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 89450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 89550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(onlyContiguous) { 89650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // FCC: no discontiguous compositions; any intervening character blocks. 89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compositionsList=NULL; 89850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.setReorderingLimit(limit); 90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 90350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Very similar to composeQuickCheck(): Make the same changes in both places if relevant. 90450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// doCompose: normalize 90550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// !doCompose: isNormalized (buffer must be empty and initialized) 90650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool 90750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::compose(const UChar *src, const UChar *limit, 90850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool onlyContiguous, 90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool doCompose, 91050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer &buffer, 91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 91250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 91350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * prevBoundary points to the last character before the current one 91450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * that has a composition boundary before it with ccc==0 and quick check "yes". 91550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Keeping track of prevBoundary saves us looking for a composition boundary 91650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * when we find a "no" or "maybe". 91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 91850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When we back out from prevSrc back to prevBoundary, 91950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * then we also remove those same characters (which had been simply copied 92050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or canonically-order-inserted) from the ReorderingBuffer. 92150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Therefore, at all times, the [prevBoundary..prevSrc[ source units 92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * must correspond 1:1 to destination units at the end of the destination buffer. 92350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 92450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevBoundary=src; 92527f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 minNoMaybeCP=minCompNoMaybeCP; 92627f654740f2a26ad62a5c155af9199af9e69b889claireho if(limit==NULL) { 92727f654740f2a26ad62a5c155af9199af9e69b889claireho src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, 92827f654740f2a26ad62a5c155af9199af9e69b889claireho doCompose ? &buffer : NULL, 92927f654740f2a26ad62a5c155af9199af9e69b889claireho errorCode); 93027f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(errorCode)) { 93127f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 93227f654740f2a26ad62a5c155af9199af9e69b889claireho } 93327f654740f2a26ad62a5c155af9199af9e69b889claireho if(prevBoundary<src) { 93427f654740f2a26ad62a5c155af9199af9e69b889claireho // Set prevBoundary to the last character in the prefix. 93527f654740f2a26ad62a5c155af9199af9e69b889claireho prevBoundary=src-1; 93627f654740f2a26ad62a5c155af9199af9e69b889claireho } 93727f654740f2a26ad62a5c155af9199af9e69b889claireho limit=u_strchr(src, 0); 93827f654740f2a26ad62a5c155af9199af9e69b889claireho } 93927f654740f2a26ad62a5c155af9199af9e69b889claireho 94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevSrc; 94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c=0; 94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16=0; 94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // only for isNormalized 94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t prevCC=0; 94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // count code units below the minimum or with irrelevant data for the quick check 94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(prevSrc=src; src!=limit;) { 95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (c=*src)<minNoMaybeCP || 95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c)) 95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 95350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 95450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(!U16_IS_SURROGATE(c)) { 95550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 95650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c2; 95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U16_IS_SURROGATE_LEAD(c)) { 95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { 96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c, c2); 96150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else /* trail surrogate */ { 96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) { 96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --src; 96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c2, c); 96650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 96750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 96850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isCompYesAndZeroCC(norm16=getNorm16(c))) { 96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src+=U16_LENGTH(c); 97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 97150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 97450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // copy these code units all at once 97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src!=prevSrc) { 97750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(doCompose) { 97850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!buffer.appendZeroCC(prevSrc, src, errorCode)) { 97950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 98050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=0; 98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src==limit) { 98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set prevBoundary to the last character in the quick check loop. 98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src-1; 98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary && 99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_IS_LEAD(*(prevBoundary-1)) 99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 99250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --prevBoundary; 99350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 99450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The start of the current character (c). 99550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevSrc=src; 99650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(src==limit) { 99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src+=U16_LENGTH(c); 100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. 100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) 100450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or has ccc!=0. 100550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Check for Jamo V/T, then for regular characters. 100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * c is not a Hangul syllable or Jamo L because those have "yes" properties. 100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isJamoVT(norm16) && prevBoundary!=prevSrc) { 100950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar prev=*(prevSrc-1); 101050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool needToDecompose=FALSE; 101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(c<Hangul::JAMO_T_BASE) { 101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. 101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prev=(UChar)(prev-Hangul::JAMO_L_BASE); 101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prev<Hangul::JAMO_L_COUNT) { 101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!doCompose) { 101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 101850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar syllable=(UChar) 101950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (Hangul::HANGUL_BASE+ 102050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))* 102150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Hangul::JAMO_T_COUNT); 102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar t; 102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src!=limit && (t=(UChar)(*src-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) { 102450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho syllable+=t; // The next character was a Jamo T. 102650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src; 102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.setLastChar(syllable); 102850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we see L+V+x where x!=T then we drop to the slow path, 103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // decompose and recompose. 103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This is to deal with NFKC finding normal L and V but a 103350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // compatibility variant of a T. We need to either fully compose that 103450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // combination here (which would complicate the code and may not work 103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // with strange custom data) or use the slow path -- or else our replacing 103650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // two input characters (L+V) with one output character (LV syllable) 103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // would violate the invariant that [prevBoundary..prevSrc[ has the same 103850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // length as what we appended to the buffer since prevBoundary. 103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho needToDecompose=TRUE; 104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(Hangul::isHangulWithoutJamoT(prev)) { 104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c is a Jamo Trailing consonant, 104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // compose with previous Hangul LV that does not contain a Jamo T. 104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!doCompose) { 104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.setLastChar((UChar)(prev+c-Hangul::JAMO_T_BASE)); 104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src; 104950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 105050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!needToDecompose) { 105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The Jamo V/T did not compose into a Hangul syllable. 105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(doCompose) { 105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!buffer.appendBMP((UChar)c, 0, errorCode)) { 105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 105750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=0; 105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 106250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 106350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 106450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Source buffer pointers: 106550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 106650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * all done quick check current char not yet 106750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "yes" but (c) processed 106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * may combine 106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * forward 107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * [-------------[-------------[-------------[-------------[ 107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * | | | | | 107250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * orig. src prevBoundary prevSrc src limit 107350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Destination buffer pointers inside the ReorderingBuffer: 107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 107750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * all done might take not filled yet 107850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * characters for 107950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * reordering 108050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * [-------------[-------------[-------------[ 108150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * | | | | 108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * start reorderStart limit | 108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * +remainingCap.+ 108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norm16>=MIN_YES_YES_WITH_CC) { 108650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t cc=(uint8_t)norm16; // cc!=0 108750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( onlyContiguous && // FCC 108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (doCompose ? buffer.getLastCC() : prevCC)==0 && 108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary<prevSrc && 109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that 109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // [prevBoundary..prevSrc[ (which is exactly one character under these conditions) 109250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // passed the quick check "yes && ccc==0" test. 109350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether the last character was a "yesYes" or a "yesNo". 109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If a "yesNo", then we get its trailing ccc from its 109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // mapping and check for canonical order. 109650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // All other cases are ok. 109750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc 109850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 109950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fails FCD test, need to decompose and contiguously recompose. 110050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!doCompose) { 110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 110250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 110350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(doCompose) { 110450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!buffer.append(c, cc, errorCode)) { 110550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 110650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 110750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 110850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(prevCC<=cc) { 110950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=cc; 111050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 111150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 111250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 111350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 111450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) { 111550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 111650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 111750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 111850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 111950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Find appropriate boundaries around this character, 112050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * decompose the source text from between the boundaries, 112150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and recompose it. 112250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 112350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * We may need to remove the last few characters from the ReorderingBuffer 112450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to account for source text that was copied or appended 112550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * but needs to take part in the recomposition. 112650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 112750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 112850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 112950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Find the last composition boundary in [prevBoundary..src[. 113050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * It is either the decomposition of the current character (at prevSrc), 113150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or prevBoundary. 113250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 113350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(hasCompBoundaryBefore(c, norm16)) { 113450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=prevSrc; 113550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(doCompose) { 113650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.removeSuffix((int32_t)(prevSrc-prevBoundary)); 113750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 113850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 113950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Find the next composition boundary in [src..limit[ - 114050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // modifies src to point to the next starter. 114150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src=(UChar *)findNextCompBoundary(src, limit); 114250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 114350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it. 114450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t recomposeStartIndex=buffer.length(); 114550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!decomposeShort(prevBoundary, src, buffer, errorCode)) { 114650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 114750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 114850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho recompose(buffer, recomposeStartIndex, onlyContiguous); 114950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!doCompose) { 115050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!buffer.equals(prevBoundary, src)) { 115150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 115250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 115350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.remove(); 115450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=0; 115550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 115650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 115750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Move to the next starter. We never need to look back before this point again. 115850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src; 115950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 116050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 116150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 116250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 116350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Very similar to compose(): Make the same changes in both places if relevant. 116450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pQCResult==NULL: spanQuickCheckYes 116550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES) 116650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar * 116750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit, 116850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool onlyContiguous, 116950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNormalizationCheckResult *pQCResult) const { 117027f654740f2a26ad62a5c155af9199af9e69b889claireho /* 117127f654740f2a26ad62a5c155af9199af9e69b889claireho * prevBoundary points to the last character before the current one 117227f654740f2a26ad62a5c155af9199af9e69b889claireho * that has a composition boundary before it with ccc==0 and quick check "yes". 117327f654740f2a26ad62a5c155af9199af9e69b889claireho */ 117427f654740f2a26ad62a5c155af9199af9e69b889claireho const UChar *prevBoundary=src; 117550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 minNoMaybeCP=minCompNoMaybeCP; 117650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(limit==NULL) { 117750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode errorCode=U_ZERO_ERROR; 117850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode); 117927f654740f2a26ad62a5c155af9199af9e69b889claireho if(prevBoundary<src) { 118027f654740f2a26ad62a5c155af9199af9e69b889claireho // Set prevBoundary to the last character in the prefix. 118127f654740f2a26ad62a5c155af9199af9e69b889claireho prevBoundary=src-1; 118227f654740f2a26ad62a5c155af9199af9e69b889claireho } 118350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit=u_strchr(src, 0); 118450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 118550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 118650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevSrc; 118750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c=0; 118850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16=0; 118950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t prevCC=0; 119050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 119150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 119250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // count code units below the minimum or with irrelevant data for the quick check 119350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(prevSrc=src;;) { 119450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src==limit) { 119550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return src; 119650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 119750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (c=*src)<minNoMaybeCP || 119850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c)) 119950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 120050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 120150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(!U16_IS_SURROGATE(c)) { 120250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 120350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 120450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c2; 120550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U16_IS_SURROGATE_LEAD(c)) { 120650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { 120750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c, c2); 120850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 120950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else /* trail surrogate */ { 121050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) { 121150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --src; 121250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c2, c); 121350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 121450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 121550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isCompYesAndZeroCC(norm16=getNorm16(c))) { 121650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src+=U16_LENGTH(c); 121750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 121850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 121950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 122050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 122150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 122250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src!=prevSrc) { 122350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set prevBoundary to the last character in the quick check loop. 122450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src-1; 122550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary && 122650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_IS_LEAD(*(prevBoundary-1)) 122750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 122850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --prevBoundary; 122950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 123050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=0; 123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The start of the current character (c). 123250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevSrc=src; 123350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 123450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 123550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src+=U16_LENGTH(c); 123650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 123750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. 123850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) 123950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or has ccc!=0. 124050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 124150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isMaybeOrNonZeroCC(norm16)) { 124250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t cc=getCCFromYesOrMaybe(norm16); 124350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( onlyContiguous && // FCC 124450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cc!=0 && 124550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC==0 && 124650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary<prevSrc && 124750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // prevCC==0 && prevBoundary<prevSrc tell us that 124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // [prevBoundary..prevSrc[ (which is exactly one character under these conditions) 124950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // passed the quick check "yes && ccc==0" test. 125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether the last character was a "yesYes" or a "yesNo". 125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If a "yesNo", then we get its trailing ccc from its 125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // mapping and check for canonical order. 125350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // All other cases are ok. 125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc 125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 125650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fails FCD test. 125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(prevCC<=cc || cc==0) { 125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=cc; 125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norm16<MIN_YES_YES_WITH_CC) { 126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pQCResult!=NULL) { 126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pQCResult=UNORM_MAYBE; 126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return prevBoundary; 126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pQCResult!=NULL) { 127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pQCResult=UNORM_NO; 127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return prevBoundary; 127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 127650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit, 127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool doCompose, 127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool onlyContiguous, 1279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString &safeMiddle, 128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer &buffer, 128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!buffer.isEmpty()) { 128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *firstStarterInSrc=findNextCompBoundary(src, limit); 128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src!=firstStarterInSrc) { 128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(), 128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.getLimit()); 1287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest); 1288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString middle(lastStarterInDest, destSuffixLength); 1289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buffer.removeSuffix(destSuffixLength); 1290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho safeMiddle=middle; 129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho middle.append(src, (int32_t)(firstStarterInSrc-src)); 129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *middleStart=middle.getBuffer(); 129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compose(middleStart, middleStart+middle.length(), onlyContiguous, 129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho TRUE, buffer, errorCode); 129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src=firstStarterInSrc; 129950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(doCompose) { 130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 1304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(limit==NULL) { // appendZeroCC() needs limit!=NULL 1305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho limit=u_strchr(src, 0); 1306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.appendZeroCC(src, limit, errorCode); 130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Does c have a composition boundary before it? 131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * True if its decomposition begins with a character that has 131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()). 131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes 131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (isCompYesAndZeroCC()) so we need not decompose. 131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 131850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const { 131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isCompYesAndZeroCC(norm16)) { 132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(isMaybeOrNonZeroCC(norm16)) { 132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(isDecompNoAlgorithmic(norm16)) { 132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=mapAlgorithmic(c, norm16); 132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=getNorm16(c); 132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c decomposes, get everything from the variable-length extra data 132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint16_t *mapping=getMapping(norm16); 133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t firstUnit=*mapping++; 133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((firstUnit&MAPPING_LENGTH_MASK)==0) { 133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD) && (*mapping++&0xff00)) { 133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; // non-zero leadCC 133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i=0; 133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT_UNSAFE(mapping, i, c); 134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return isCompYesAndZeroCC(getNorm16(c)); 134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 134550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const { 134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16=getNorm16(c); 134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isInert(norm16)) { 134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(norm16<=minYesNo) { 135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Hangul LVT (==minYesNo) has a boundary after it. 135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Hangul LV and non-inert yesYes characters combine forward. 135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return isHangul(norm16) && !Hangul::isHangulWithoutJamoT((UChar)c); 135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) { 135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(isDecompNoAlgorithmic(norm16)) { 135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=mapAlgorithmic(c, norm16); 135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c decomposes, get everything from the variable-length extra data. 136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If testInert, then c must be a yesNo character which has lccc=0, 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // otherwise it could be a noNo. 136250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint16_t *mapping=getMapping(norm16); 136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t firstUnit=*mapping; 136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TRUE if 136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c is not deleted, and 136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // it and its decomposition do not combine forward, and it has a starter, and 136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // if FCC then trailCC<=1 136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (firstUnit&MAPPING_LENGTH_MASK)!=0 && 137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (firstUnit&(MAPPING_PLUS_COMPOSITION_LIST|MAPPING_NO_COMP_BOUNDARY_AFTER))==0 && 137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (!onlyContiguous || firstUnit<=0x1ff); 137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 137650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p) const { 137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho BackwardUTrie2StringIterator iter(normTrie, start, p); 137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16; 137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=iter.previous16(); 138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(!hasCompBoundaryBefore(iter.codePoint, norm16)); 138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We could also test hasCompBoundaryAfter() and return iter.codePointLimit, 138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // but that's probably not worth the extra cost. 138450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return iter.codePointStart; 138550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 138750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit) const { 138850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ForwardUTrie2StringIterator iter(normTrie, p, limit); 138950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16; 139050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 139150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=iter.next16(); 139250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(!hasCompBoundaryBefore(iter.codePoint, norm16)); 139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return iter.codePointStart; 139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 139650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass FCDTrieSingleton : public UTrie2Singleton { 139750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FCDTrieSingleton(SimpleSingleton &s, Normalizer2Impl &ni, UErrorCode &ec) : 139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTrie2Singleton(s), impl(ni), errorCode(ec) {} 140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTrie2 *getInstance(UErrorCode &errorCode) { 140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UTrie2Singleton::getInstance(createInstance, this, errorCode); 140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static void *createInstance(const void *context, UErrorCode &errorCode); 140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { 140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(value!=0) { 140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho impl.setFCD16FromNorm16(start, end, (uint16_t)value, newFCDTrie, errorCode); 140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return U_SUCCESS(errorCode); 140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2Impl &impl; 141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTrie2 *newFCDTrie; 141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode; 141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 141650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN 141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Set the FCD value for a range of same-norm16 characters. 141950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV 142050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) { 142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ((FCDTrieSingleton *)context)->rangeHandler(start, end, value); 142250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Collect (OR together) the FCD values for a range of supplementary characters, 142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// for their lead surrogate code unit. 142650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV 142727f654740f2a26ad62a5c155af9199af9e69b889clairehoenumRangeOrValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) { 142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *((uint32_t *)context)|=value; 142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 143250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END 143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 143450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid *FCDTrieSingleton::createInstance(const void *context, UErrorCode &errorCode) { 143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FCDTrieSingleton *me=(FCDTrieSingleton *)context; 143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho me->newFCDTrie=utrie2_open(0, 0, &errorCode); 143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_SUCCESS(errorCode)) { 143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_enum(me->impl.getNormTrie(), NULL, enumRangeHandler, me); 143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(UChar lead=0xd800; lead<0xdc00; ++lead) { 144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t oredValue=utrie2_get32(me->newFCDTrie, lead); 144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_enumForLeadSurrogate(me->newFCDTrie, lead, NULL, enumRangeOrValue, &oredValue); 144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(oredValue!=0) { 144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set a "bad" value for makeFCD() to break the quick check loop 144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // and look up the value for the supplementary code point. 144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If there is any lccc, then set the worst-case lccc of 1. 144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The ORed-together value's tccc is already the worst case. 144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(oredValue>0xff) { 144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho oredValue=0x100|(oredValue&0xff); 144950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 145050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_set32ForLeadSurrogateCodeUnit(me->newFCDTrie, lead, oredValue, &errorCode); 145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_freeze(me->newFCDTrie, UTRIE2_16_VALUE_BITS, &errorCode); 145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_SUCCESS(errorCode)) { 145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return me->newFCDTrie; 145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_close(me->newFCDTrie); 145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 146250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16, 146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTrie2 *newFCDTrie, UErrorCode &errorCode) const { 146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Only loops for 1:1 algorithmic mappings. 146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norm16>=MIN_NORMAL_MAYBE_YES) { 146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16&=0xff; 146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16|=norm16<<8; 146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(norm16<=minYesNo || minMaybeYes<=norm16) { 147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // no decomposition or Hangul syllable, all zeros 147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(limitNoNo<=norm16) { 147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t delta=norm16-(minMaybeYes-MAX_DELTA-1); 147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(start==end) { 147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho start+=delta; 147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=getNorm16(start); 147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the same delta leads from different original characters to different mappings 147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c=start+delta; 148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho setFCD16FromNorm16(c, c, getNorm16(c), newFCDTrie, errorCode); 148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(++start<=end); 148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c decomposes, get everything from the variable-length extra data 148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint16_t *mapping=getMapping(norm16); 148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t firstUnit=*mapping; 148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((firstUnit&MAPPING_LENGTH_MASK)==0) { 149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // A character that is deleted (maps to an empty string) must 149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // get the worst-case lccc and tccc values because arbitrary 149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // characters on both sides will become adjacent. 149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=0x1ff; 149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { 149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=mapping[1]&0xff00; // lccc 149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=0; 149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16|=firstUnit>>8; // tccc 150150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 150250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_setRange32(newFCDTrie, start, end, norm16, TRUE, &errorCode); 150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 150850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UTrie2 *Normalizer2Impl::getFCDTrie(UErrorCode &errorCode) const { 150950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Logically const: Synchronized instantiation. 151050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this); 151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FCDTrieSingleton(me->fcdTrieSingleton, *me, errorCode).getInstance(errorCode); 151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Dual functionality: 151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer!=NULL: normalize 151650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes 151750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar * 151850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::makeFCD(const UChar *src, const UChar *limit, 151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer *buffer, 152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 152127f654740f2a26ad62a5c155af9199af9e69b889claireho // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. 152227f654740f2a26ad62a5c155af9199af9e69b889claireho // Similar to the prevBoundary in the compose() implementation. 152327f654740f2a26ad62a5c155af9199af9e69b889claireho const UChar *prevBoundary=src; 152427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t prevFCD16=0; 152550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(limit==NULL) { 152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src=copyLowPrefixFromNulTerminated(src, MIN_CCC_LCCC_CP, buffer, errorCode); 152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return src; 152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 153027f654740f2a26ad62a5c155af9199af9e69b889claireho if(prevBoundary<src) { 153127f654740f2a26ad62a5c155af9199af9e69b889claireho prevBoundary=src; 153227f654740f2a26ad62a5c155af9199af9e69b889claireho // We know that the previous character's lccc==0. 153327f654740f2a26ad62a5c155af9199af9e69b889claireho // Fetching the fcd16 value was deferred for this below-U+0300 code point. 153427f654740f2a26ad62a5c155af9199af9e69b889claireho prevFCD16=getFCD16FromSingleLead(*(src-1)); 153527f654740f2a26ad62a5c155af9199af9e69b889claireho if(prevFCD16>1) { 153627f654740f2a26ad62a5c155af9199af9e69b889claireho --prevBoundary; 153727f654740f2a26ad62a5c155af9199af9e69b889claireho } 153827f654740f2a26ad62a5c155af9199af9e69b889claireho } 153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho limit=u_strchr(src, 0); 154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note: In this function we use buffer->appendZeroCC() because we track 154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the lead and trail combining classes here, rather than leaving it to 154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the ReorderingBuffer. 154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The exception is the call to decomposeShort() which uses the buffer 154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in the normal way. 154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UTrie2 *trie=fcdTrie(); 154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevSrc; 155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c=0; 155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t fcd16=0; 155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // count code units with lccc==0 155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(prevSrc=src; src!=limit;) { 155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((c=*src)<MIN_CCC_LCCC_CP) { 155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevFCD16=~c; 155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if((fcd16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c))<=0xff) { 156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevFCD16=fcd16; 156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(!U16_IS_SURROGATE(c)) { 156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c2; 156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U16_IS_SURROGATE_LEAD(c)) { 156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { 156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c, c2); 157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else /* trail surrogate */ { 157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) { 157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --src; 157450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c=U16_GET_SUPPLEMENTARY(c2, c); 157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((fcd16=getFCD16(c))<=0xff) { 157850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevFCD16=fcd16; 157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src+=U16_LENGTH(c); 158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 158150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // copy these code units all at once 158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src!=prevSrc) { 158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) { 158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src==limit) { 159150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 159250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src; 159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We know that the previous character's lccc==0. 159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevFCD16<0) { 159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetching the fcd16 value was deferred for this below-U+0300 code point. 159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevFCD16=getFCD16FromSingleLead((UChar)~prevFCD16); 159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevFCD16>1) { 159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --prevBoundary; 160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 160150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *p=src-1; 160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) { 160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --p; 160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Need to fetch the previous character's FCD value because 160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // prevFCD16 was just for the trail surrogate code point. 160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevFCD16=getFCD16FromSurrogatePair(p[0], p[1]); 160850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Still known to have lccc==0 because its lead surrogate unit had lccc==0. 160950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prevFCD16>1) { 161150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=p; 161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The start of the current character (c). 161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevSrc=src; 161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(src==limit) { 161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src+=U16_LENGTH(c); 162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. 162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check for proper order, and decompose locally if necessary. 162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((prevFCD16&0xff)<=(fcd16>>8)) { 162450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // proper order: prev tccc <= current lccc 162550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((fcd16&0xff)<=1) { 162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src; 162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) { 162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevFCD16=fcd16; 163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(buffer==NULL) { 163450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return prevBoundary; // quick check "no" 163550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Back out the part of the source that we copied or appended 163850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * already but is now going to be decomposed. 163950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * prevSrc is set to after what was copied/appended. 164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer->removeSuffix((int32_t)(prevSrc-prevBoundary)); 164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Find the part of the source that needs to be decomposed, 164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * up to the next safe boundary. 164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src=findNextFCDBoundary(src, limit); 164750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The source text does not fulfill the conditions for FCD. 164950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Decompose and reorder a limited piece of the text. 165050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 165150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!decomposeShort(prevBoundary, src, *buffer, errorCode)) { 165250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 165350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 165450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevBoundary=src; 165550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevFCD16=0; 165650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 165750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 165850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return src; 165950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 166050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 166150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit, 166250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool doMakeFCD, 1663b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString &safeMiddle, 166450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ReorderingBuffer &buffer, 166550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const { 166650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!buffer.isEmpty()) { 166750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit); 166850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(src!=firstBoundaryInSrc) { 166950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(), 167050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.getLimit()); 1671b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest); 1672b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString middle(lastBoundaryInDest, destSuffixLength); 1673b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buffer.removeSuffix(destSuffixLength); 1674b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho safeMiddle=middle; 167550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho middle.append(src, (int32_t)(firstBoundaryInSrc-src)); 167650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *middleStart=middle.getBuffer(); 167750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode); 167850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 167950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 168050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 168150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src=firstBoundaryInSrc; 168250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 168350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 168450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(doMakeFCD) { 168550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho makeFCD(src, limit, &buffer, errorCode); 168650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 1687b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(limit==NULL) { // appendZeroCC() needs limit!=NULL 1688b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho limit=u_strchr(src, 0); 1689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 169050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.appendZeroCC(src, limit, errorCode); 169150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 169250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 169350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 169450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const { 169550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho BackwardUTrie2StringIterator iter(fcdTrie(), start, p); 169650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t fcd16; 169750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fcd16=iter.previous16(); 169950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(fcd16>0xff); 170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return iter.codePointStart; 170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 170350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const { 170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ForwardUTrie2StringIterator iter(fcdTrie(), p, limit); 170550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t fcd16; 170650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 170750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fcd16=iter.next16(); 170850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(fcd16>0xff); 170950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return iter.codePointStart; 171050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 171150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 171227f654740f2a26ad62a5c155af9199af9e69b889claireho// CanonicalIterator data -------------------------------------------------- *** 171327f654740f2a26ad62a5c155af9199af9e69b889claireho 171427f654740f2a26ad62a5c155af9199af9e69b889clairehoCanonIterData::CanonIterData(UErrorCode &errorCode) : 171527f654740f2a26ad62a5c155af9199af9e69b889claireho trie(utrie2_open(0, 0, &errorCode)), 171627f654740f2a26ad62a5c155af9199af9e69b889claireho canonStartSets(uhash_deleteUObject, NULL, errorCode) {} 171727f654740f2a26ad62a5c155af9199af9e69b889claireho 171827f654740f2a26ad62a5c155af9199af9e69b889clairehoCanonIterData::~CanonIterData() { 171927f654740f2a26ad62a5c155af9199af9e69b889claireho utrie2_close(trie); 172027f654740f2a26ad62a5c155af9199af9e69b889claireho} 172127f654740f2a26ad62a5c155af9199af9e69b889claireho 172227f654740f2a26ad62a5c155af9199af9e69b889clairehovoid CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) { 172327f654740f2a26ad62a5c155af9199af9e69b889claireho uint32_t canonValue=utrie2_get32(trie, decompLead); 172427f654740f2a26ad62a5c155af9199af9e69b889claireho if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) { 172527f654740f2a26ad62a5c155af9199af9e69b889claireho // origin is the first character whose decomposition starts with 172627f654740f2a26ad62a5c155af9199af9e69b889claireho // the character for which we are setting the value. 172727f654740f2a26ad62a5c155af9199af9e69b889claireho utrie2_set32(trie, decompLead, canonValue|origin, &errorCode); 172827f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 172927f654740f2a26ad62a5c155af9199af9e69b889claireho // origin is not the first character, or it is U+0000. 173027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet *set; 173127f654740f2a26ad62a5c155af9199af9e69b889claireho if((canonValue&CANON_HAS_SET)==0) { 173227f654740f2a26ad62a5c155af9199af9e69b889claireho set=new UnicodeSet; 173327f654740f2a26ad62a5c155af9199af9e69b889claireho if(set==NULL) { 173427f654740f2a26ad62a5c155af9199af9e69b889claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 173527f654740f2a26ad62a5c155af9199af9e69b889claireho return; 173627f654740f2a26ad62a5c155af9199af9e69b889claireho } 173727f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK); 173827f654740f2a26ad62a5c155af9199af9e69b889claireho canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size(); 173927f654740f2a26ad62a5c155af9199af9e69b889claireho utrie2_set32(trie, decompLead, canonValue, &errorCode); 174027f654740f2a26ad62a5c155af9199af9e69b889claireho canonStartSets.addElement(set, errorCode); 174127f654740f2a26ad62a5c155af9199af9e69b889claireho if(firstOrigin!=0) { 174227f654740f2a26ad62a5c155af9199af9e69b889claireho set->add(firstOrigin); 174327f654740f2a26ad62a5c155af9199af9e69b889claireho } 174427f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 174527f654740f2a26ad62a5c155af9199af9e69b889claireho set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)]; 174627f654740f2a26ad62a5c155af9199af9e69b889claireho } 174727f654740f2a26ad62a5c155af9199af9e69b889claireho set->add(origin); 174827f654740f2a26ad62a5c155af9199af9e69b889claireho } 174927f654740f2a26ad62a5c155af9199af9e69b889claireho} 175027f654740f2a26ad62a5c155af9199af9e69b889claireho 175127f654740f2a26ad62a5c155af9199af9e69b889clairehoclass CanonIterDataSingleton { 175227f654740f2a26ad62a5c155af9199af9e69b889clairehopublic: 175327f654740f2a26ad62a5c155af9199af9e69b889claireho CanonIterDataSingleton(SimpleSingleton &s, Normalizer2Impl &ni, UErrorCode &ec) : 175427f654740f2a26ad62a5c155af9199af9e69b889claireho singleton(s), impl(ni), errorCode(ec) {} 175527f654740f2a26ad62a5c155af9199af9e69b889claireho CanonIterData *getInstance(UErrorCode &errorCode) { 175627f654740f2a26ad62a5c155af9199af9e69b889claireho void *duplicate; 175727f654740f2a26ad62a5c155af9199af9e69b889claireho CanonIterData *instance= 175827f654740f2a26ad62a5c155af9199af9e69b889claireho (CanonIterData *)singleton.getInstance(createInstance, this, duplicate, errorCode); 175927f654740f2a26ad62a5c155af9199af9e69b889claireho delete (CanonIterData *)duplicate; 176027f654740f2a26ad62a5c155af9199af9e69b889claireho return instance; 176127f654740f2a26ad62a5c155af9199af9e69b889claireho } 176227f654740f2a26ad62a5c155af9199af9e69b889claireho static void *createInstance(const void *context, UErrorCode &errorCode); 176327f654740f2a26ad62a5c155af9199af9e69b889claireho UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { 176427f654740f2a26ad62a5c155af9199af9e69b889claireho if(value!=0) { 176527f654740f2a26ad62a5c155af9199af9e69b889claireho impl.makeCanonIterDataFromNorm16(start, end, (uint16_t)value, *newData, errorCode); 176627f654740f2a26ad62a5c155af9199af9e69b889claireho } 176727f654740f2a26ad62a5c155af9199af9e69b889claireho return U_SUCCESS(errorCode); 176827f654740f2a26ad62a5c155af9199af9e69b889claireho } 176927f654740f2a26ad62a5c155af9199af9e69b889claireho 177027f654740f2a26ad62a5c155af9199af9e69b889clairehoprivate: 177127f654740f2a26ad62a5c155af9199af9e69b889claireho SimpleSingleton &singleton; 177227f654740f2a26ad62a5c155af9199af9e69b889claireho Normalizer2Impl &impl; 177327f654740f2a26ad62a5c155af9199af9e69b889claireho CanonIterData *newData; 177427f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode &errorCode; 177527f654740f2a26ad62a5c155af9199af9e69b889claireho}; 177627f654740f2a26ad62a5c155af9199af9e69b889claireho 177727f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_BEGIN 177827f654740f2a26ad62a5c155af9199af9e69b889claireho 177927f654740f2a26ad62a5c155af9199af9e69b889claireho// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. 178027f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool U_CALLCONV 178127f654740f2a26ad62a5c155af9199af9e69b889clairehoenumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) { 178227f654740f2a26ad62a5c155af9199af9e69b889claireho return ((CanonIterDataSingleton *)context)->rangeHandler(start, end, value); 178327f654740f2a26ad62a5c155af9199af9e69b889claireho} 178427f654740f2a26ad62a5c155af9199af9e69b889claireho 178527f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_END 178627f654740f2a26ad62a5c155af9199af9e69b889claireho 178727f654740f2a26ad62a5c155af9199af9e69b889clairehovoid *CanonIterDataSingleton::createInstance(const void *context, UErrorCode &errorCode) { 178827f654740f2a26ad62a5c155af9199af9e69b889claireho CanonIterDataSingleton *me=(CanonIterDataSingleton *)context; 178927f654740f2a26ad62a5c155af9199af9e69b889claireho me->newData=new CanonIterData(errorCode); 179027f654740f2a26ad62a5c155af9199af9e69b889claireho if(me->newData==NULL) { 179127f654740f2a26ad62a5c155af9199af9e69b889claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 179227f654740f2a26ad62a5c155af9199af9e69b889claireho return NULL; 179327f654740f2a26ad62a5c155af9199af9e69b889claireho } 179427f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_SUCCESS(errorCode)) { 179527f654740f2a26ad62a5c155af9199af9e69b889claireho utrie2_enum(me->impl.getNormTrie(), NULL, enumCIDRangeHandler, me); 179627f654740f2a26ad62a5c155af9199af9e69b889claireho utrie2_freeze(me->newData->trie, UTRIE2_32_VALUE_BITS, &errorCode); 179727f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_SUCCESS(errorCode)) { 179827f654740f2a26ad62a5c155af9199af9e69b889claireho return me->newData; 179927f654740f2a26ad62a5c155af9199af9e69b889claireho } 180027f654740f2a26ad62a5c155af9199af9e69b889claireho } 180127f654740f2a26ad62a5c155af9199af9e69b889claireho delete me->newData; 180227f654740f2a26ad62a5c155af9199af9e69b889claireho return NULL; 180327f654740f2a26ad62a5c155af9199af9e69b889claireho} 180427f654740f2a26ad62a5c155af9199af9e69b889claireho 180527f654740f2a26ad62a5c155af9199af9e69b889clairehovoid Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16, 180627f654740f2a26ad62a5c155af9199af9e69b889claireho CanonIterData &newData, 180727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode &errorCode) const { 180827f654740f2a26ad62a5c155af9199af9e69b889claireho if(norm16==0 || (minYesNo<=norm16 && norm16<minNoNo)) { 180927f654740f2a26ad62a5c155af9199af9e69b889claireho // Inert, or 2-way mapping (including Hangul syllable). 181027f654740f2a26ad62a5c155af9199af9e69b889claireho // We do not write a canonStartSet for any yesNo character. 181127f654740f2a26ad62a5c155af9199af9e69b889claireho // Composites from 2-way mappings are added at runtime from the 181227f654740f2a26ad62a5c155af9199af9e69b889claireho // starter's compositions list, and the other characters in 181327f654740f2a26ad62a5c155af9199af9e69b889claireho // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are 181427f654740f2a26ad62a5c155af9199af9e69b889claireho // "maybe" characters. 181527f654740f2a26ad62a5c155af9199af9e69b889claireho return; 181627f654740f2a26ad62a5c155af9199af9e69b889claireho } 181727f654740f2a26ad62a5c155af9199af9e69b889claireho for(UChar32 c=start; c<=end; ++c) { 181827f654740f2a26ad62a5c155af9199af9e69b889claireho uint32_t oldValue=utrie2_get32(newData.trie, c); 181927f654740f2a26ad62a5c155af9199af9e69b889claireho uint32_t newValue=oldValue; 182027f654740f2a26ad62a5c155af9199af9e69b889claireho if(norm16>=minMaybeYes) { 182127f654740f2a26ad62a5c155af9199af9e69b889claireho // not a segment starter if it occurs in a decomposition or has cc!=0 182227f654740f2a26ad62a5c155af9199af9e69b889claireho newValue|=CANON_NOT_SEGMENT_STARTER; 182327f654740f2a26ad62a5c155af9199af9e69b889claireho if(norm16<MIN_NORMAL_MAYBE_YES) { 182427f654740f2a26ad62a5c155af9199af9e69b889claireho newValue|=CANON_HAS_COMPOSITIONS; 182527f654740f2a26ad62a5c155af9199af9e69b889claireho } 182627f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(norm16<minYesNo) { 182727f654740f2a26ad62a5c155af9199af9e69b889claireho newValue|=CANON_HAS_COMPOSITIONS; 182827f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 182927f654740f2a26ad62a5c155af9199af9e69b889claireho // c has a one-way decomposition 183027f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c2=c; 183127f654740f2a26ad62a5c155af9199af9e69b889claireho uint16_t norm16_2=norm16; 183227f654740f2a26ad62a5c155af9199af9e69b889claireho while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) { 183327f654740f2a26ad62a5c155af9199af9e69b889claireho c2=mapAlgorithmic(c2, norm16_2); 183427f654740f2a26ad62a5c155af9199af9e69b889claireho norm16_2=getNorm16(c2); 183527f654740f2a26ad62a5c155af9199af9e69b889claireho } 183627f654740f2a26ad62a5c155af9199af9e69b889claireho if(minYesNo<=norm16_2 && norm16_2<limitNoNo) { 183727f654740f2a26ad62a5c155af9199af9e69b889claireho // c decomposes, get everything from the variable-length extra data 183827f654740f2a26ad62a5c155af9199af9e69b889claireho const uint16_t *mapping=getMapping(norm16_2); 183927f654740f2a26ad62a5c155af9199af9e69b889claireho uint16_t firstUnit=*mapping++; 184027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t length=firstUnit&MAPPING_LENGTH_MASK; 184127f654740f2a26ad62a5c155af9199af9e69b889claireho if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 184227f654740f2a26ad62a5c155af9199af9e69b889claireho if(c==c2 && (*mapping&0xff)!=0) { 184327f654740f2a26ad62a5c155af9199af9e69b889claireho newValue|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0 184427f654740f2a26ad62a5c155af9199af9e69b889claireho } 184527f654740f2a26ad62a5c155af9199af9e69b889claireho ++mapping; 184627f654740f2a26ad62a5c155af9199af9e69b889claireho } 184727f654740f2a26ad62a5c155af9199af9e69b889claireho // Skip empty mappings (no characters in the decomposition). 184827f654740f2a26ad62a5c155af9199af9e69b889claireho if(length!=0) { 184927f654740f2a26ad62a5c155af9199af9e69b889claireho // add c to first code point's start set 185027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t i=0; 185127f654740f2a26ad62a5c155af9199af9e69b889claireho U16_NEXT_UNSAFE(mapping, i, c2); 185227f654740f2a26ad62a5c155af9199af9e69b889claireho newData.addToStartSet(c, c2, errorCode); 185327f654740f2a26ad62a5c155af9199af9e69b889claireho // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a 185427f654740f2a26ad62a5c155af9199af9e69b889claireho // one-way mapping. A 2-way mapping is possible here after 185527f654740f2a26ad62a5c155af9199af9e69b889claireho // intermediate algorithmic mapping. 185627f654740f2a26ad62a5c155af9199af9e69b889claireho if(norm16_2>=minNoNo) { 185727f654740f2a26ad62a5c155af9199af9e69b889claireho while(i<length) { 185827f654740f2a26ad62a5c155af9199af9e69b889claireho U16_NEXT_UNSAFE(mapping, i, c2); 185927f654740f2a26ad62a5c155af9199af9e69b889claireho uint32_t c2Value=utrie2_get32(newData.trie, c2); 186027f654740f2a26ad62a5c155af9199af9e69b889claireho if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) { 186127f654740f2a26ad62a5c155af9199af9e69b889claireho utrie2_set32(newData.trie, c2, c2Value|CANON_NOT_SEGMENT_STARTER, 186227f654740f2a26ad62a5c155af9199af9e69b889claireho &errorCode); 186327f654740f2a26ad62a5c155af9199af9e69b889claireho } 186427f654740f2a26ad62a5c155af9199af9e69b889claireho } 186527f654740f2a26ad62a5c155af9199af9e69b889claireho } 186627f654740f2a26ad62a5c155af9199af9e69b889claireho } 186727f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 186827f654740f2a26ad62a5c155af9199af9e69b889claireho // c decomposed to c2 algorithmically; c has cc==0 186927f654740f2a26ad62a5c155af9199af9e69b889claireho newData.addToStartSet(c, c2, errorCode); 187027f654740f2a26ad62a5c155af9199af9e69b889claireho } 187127f654740f2a26ad62a5c155af9199af9e69b889claireho } 187227f654740f2a26ad62a5c155af9199af9e69b889claireho if(newValue!=oldValue) { 187327f654740f2a26ad62a5c155af9199af9e69b889claireho utrie2_set32(newData.trie, c, newValue, &errorCode); 187427f654740f2a26ad62a5c155af9199af9e69b889claireho } 187527f654740f2a26ad62a5c155af9199af9e69b889claireho } 187627f654740f2a26ad62a5c155af9199af9e69b889claireho} 187727f654740f2a26ad62a5c155af9199af9e69b889claireho 187827f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const { 187927f654740f2a26ad62a5c155af9199af9e69b889claireho // Logically const: Synchronized instantiation. 188027f654740f2a26ad62a5c155af9199af9e69b889claireho Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this); 188127f654740f2a26ad62a5c155af9199af9e69b889claireho CanonIterDataSingleton(me->canonIterDataSingleton, *me, errorCode).getInstance(errorCode); 188227f654740f2a26ad62a5c155af9199af9e69b889claireho return U_SUCCESS(errorCode); 188327f654740f2a26ad62a5c155af9199af9e69b889claireho} 188427f654740f2a26ad62a5c155af9199af9e69b889claireho 188527f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t Normalizer2Impl::getCanonValue(UChar32 c) const { 188627f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)utrie2_get32(((CanonIterData *)canonIterDataSingleton.fInstance)->trie, c); 188727f654740f2a26ad62a5c155af9199af9e69b889claireho} 188827f654740f2a26ad62a5c155af9199af9e69b889claireho 188927f654740f2a26ad62a5c155af9199af9e69b889clairehoconst UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const { 189027f654740f2a26ad62a5c155af9199af9e69b889claireho return *(const UnicodeSet *)( 189127f654740f2a26ad62a5c155af9199af9e69b889claireho ((CanonIterData *)canonIterDataSingleton.fInstance)->canonStartSets[n]); 189227f654740f2a26ad62a5c155af9199af9e69b889claireho} 189327f654740f2a26ad62a5c155af9199af9e69b889claireho 189427f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const { 189527f654740f2a26ad62a5c155af9199af9e69b889claireho return getCanonValue(c)>=0; 189627f654740f2a26ad62a5c155af9199af9e69b889claireho} 189727f654740f2a26ad62a5c155af9199af9e69b889claireho 189827f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const { 189927f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER; 190027f654740f2a26ad62a5c155af9199af9e69b889claireho if(canonValue==0) { 190127f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 190227f654740f2a26ad62a5c155af9199af9e69b889claireho } 190327f654740f2a26ad62a5c155af9199af9e69b889claireho set.clear(); 190427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t value=canonValue&CANON_VALUE_MASK; 190527f654740f2a26ad62a5c155af9199af9e69b889claireho if((canonValue&CANON_HAS_SET)!=0) { 190627f654740f2a26ad62a5c155af9199af9e69b889claireho set.addAll(getCanonStartSet(value)); 190727f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(value!=0) { 190827f654740f2a26ad62a5c155af9199af9e69b889claireho set.add(value); 190927f654740f2a26ad62a5c155af9199af9e69b889claireho } 191027f654740f2a26ad62a5c155af9199af9e69b889claireho if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { 191127f654740f2a26ad62a5c155af9199af9e69b889claireho uint16_t norm16=getNorm16(c); 191227f654740f2a26ad62a5c155af9199af9e69b889claireho if(norm16==JAMO_L) { 191327f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 syllable= 191427f654740f2a26ad62a5c155af9199af9e69b889claireho (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT); 191527f654740f2a26ad62a5c155af9199af9e69b889claireho set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1); 191627f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 191727f654740f2a26ad62a5c155af9199af9e69b889claireho addComposites(getCompositionsList(norm16), set); 191827f654740f2a26ad62a5c155af9199af9e69b889claireho } 191927f654740f2a26ad62a5c155af9199af9e69b889claireho } 192027f654740f2a26ad62a5c155af9199af9e69b889claireho return TRUE; 192127f654740f2a26ad62a5c155af9199af9e69b889claireho} 192227f654740f2a26ad62a5c155af9199af9e69b889claireho 192350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 192450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 192550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Normalizer2 data swapping ----------------------------------------------- *** 192650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 192750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_USE 192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 192950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI int32_t U_EXPORT2 193050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_swap(const UDataSwapper *ds, 193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const void *inData, int32_t length, void *outData, 193250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode) { 193350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UDataInfo *pInfo; 193450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t headerSize; 193550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint8_t *inBytes; 193750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t *outBytes; 193850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 193950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const int32_t *inIndexes; 194050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t indexes[Normalizer2Impl::IX_MIN_MAYBE_YES+1]; 194150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i, offset, nextOffset, size; 194350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* udata_swapDataHeader checks the arguments */ 194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 194950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* check data format and format version */ 195150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo=(const UDataInfo *)((const char *)inData+4); 195250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!( 195350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ 195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[1]==0x72 && 195550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[2]==0x6d && 195650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[3]==0x32 && 195750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->formatVersion[0]==1 195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho )) { 195950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n", 196050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[0], pInfo->dataFormat[1], 196150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->dataFormat[2], pInfo->dataFormat[3], 196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pInfo->formatVersion[0]); 196350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode=U_UNSUPPORTED_ERROR; 196450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 196550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 196650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 196750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inBytes=(const uint8_t *)inData+headerSize; 196850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho outBytes=(uint8_t *)outData+headerSize; 196950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 197050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inIndexes=(const int32_t *)inBytes; 197150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 197250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(length>=0) { 197350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length-=headerSize; 197427f654740f2a26ad62a5c155af9199af9e69b889claireho if(length<(int32_t)sizeof(indexes)) { 197550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n", 197650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length); 197750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 197850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 197950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 198050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 198150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 198250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* read the first few indexes */ 198350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(i=0; i<=Normalizer2Impl::IX_MIN_MAYBE_YES; ++i) { 198450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[i]=udata_readInt32(ds, inIndexes[i]); 198550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 198650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 198750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* get the total length of the data */ 198850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho size=indexes[Normalizer2Impl::IX_TOTAL_SIZE]; 198950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 199050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(length>=0) { 199150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(length<size) { 199250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n", 199350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length); 199450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 199550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 199650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 199750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 199850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* copy the data for inaccessible bytes */ 199950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(inBytes!=outBytes) { 200050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_memcpy(outBytes, inBytes, size); 200150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 200250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 200350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho offset=0; 200450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 200550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* swap the int32_t indexes[] */ 200650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]; 200750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode); 200850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho offset=nextOffset; 200950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 201050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* swap the UTrie2 */ 201150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]; 201250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); 201350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho offset=nextOffset; 201450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 201550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* swap the uint16_t extraData[] */ 201650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET+1]; 201750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); 201850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho offset=nextOffset; 201950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 202050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(offset==size); 202150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 202250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 202350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return headerSize+size; 202450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 202550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 202650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif // !UCONFIG_NO_NORMALIZATION 2027