16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2009-2013, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: normalizer2impl.cpp 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2009nov22 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/normalizer2.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/udata.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "mutex.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "normalizer2impl.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "putilimp.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uset_imp.h" 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "utrie2.h" 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h" 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ReorderingBuffer -------------------------------------------------------- *** 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) { 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=str.length(); 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start=str.getBuffer(destCapacity); 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(start==NULL) { 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // getBuffer() already did str.setToBogus() 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=start+length; 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity=str.getCapacity()-length; 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=start; 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(start==limit) { 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastCC=0; 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setIterator(); 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastCC=previousCC(); 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set reorderStart after the last code point with cc<=1 if there is one. 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(lastCC>1) { 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(previousCC()>1) {} 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=codePointLimit; 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const { 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=(int32_t)(limit-start); 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length==(int32_t)(otherLimit-otherStart) && 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0==u_memcmp(start, otherStart, length); 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) { 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(remainingCapacity<2 && !resize(2, errorCode)) { 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(lastCC<=cc || cc==0) { 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit[0]=U16_LEAD(c); 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit[1]=U16_TRAIL(c); 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit+=2; 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastCC=cc; 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cc<=1) { 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=limit; 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org insert(c, cc); 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity-=2; 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::append(const UChar *s, int32_t length, 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t leadCC, uint8_t trailCC, 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) { 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length==0) { 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(remainingCapacity<length && !resize(length, errorCode)) { 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity-=length; 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(lastCC<=leadCC || leadCC==0) { 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(trailCC<=1) { 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=limit+length; 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(leadCC<=1) { 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=limit+1; // Ok if not a code point boundary. 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *sLimit=s+length; 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { *limit++=*s++; } while(s!=sLimit); 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastCC=trailCC; 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i=0; 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(s, i, length, c); 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org insert(c, leadCC); // insert first code point 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(i<length) { 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(s, i, length, c); 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<length) { 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // s must be in NFD, otherwise we need to use getCC(). 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c)); 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org leadCC=trailCC; 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org append(c, leadCC, errorCode); 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) { 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t cpLength=U16_LENGTH(c); 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) { 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity-=cpLength; 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cpLength==1) { 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *limit++=(UChar)c; 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit[0]=U16_LEAD(c); 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit[1]=U16_TRAIL(c); 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit+=2; 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastCC=0; 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=limit; 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode) { 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(s==sLimit) { 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=(int32_t)(sLimit-s); 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(remainingCapacity<length && !resize(length, errorCode)) { 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_memcpy(limit, s, length); 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit+=length; 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity-=length; 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastCC=0; 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=limit; 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid ReorderingBuffer::remove() { 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=limit=start; 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity=str.getCapacity(); 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastCC=0; 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid ReorderingBuffer::removeSuffix(int32_t suffixLength) { 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(suffixLength<(limit-start)) { 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit-=suffixLength; 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity+=suffixLength; 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=start; 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity=str.getCapacity(); 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastCC=0; 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=limit; 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) { 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t reorderStartIndex=(int32_t)(reorderStart-start); 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=(int32_t)(limit-start); 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org str.releaseBuffer(length); 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newCapacity=length+appendLength; 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t doubleCapacity=2*str.getCapacity(); 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newCapacity<doubleCapacity) { 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newCapacity=doubleCapacity; 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newCapacity<256) { 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newCapacity=256; 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start=str.getBuffer(newCapacity); 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(start==NULL) { 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // getBuffer() already did str.setToBogus() 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=start+reorderStartIndex; 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=start+length; 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org remainingCapacity=str.getCapacity()-length; 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid ReorderingBuffer::skipPrevious() { 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org codePointLimit=codePointStart; 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c=*--codePointStart; 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) { 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --codePointStart; 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguint8_t ReorderingBuffer::previousCC() { 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org codePointLimit=codePointStart; 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(reorderStart>=codePointStart) { 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c=*--codePointStart; 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<Normalizer2Impl::MIN_CCC_LCCC_CP) { 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c2; 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) { 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --codePointStart; 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c2, c); 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c)); 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Inserts c somewhere before the last character. 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Requires 0<cc<lastCC which implies reorderStart<limit. 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid ReorderingBuffer::insert(UChar32 c, uint8_t cc) { 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(setIterator(), skipPrevious(); previousCC()>cc;) {} 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // insert c at codePointLimit, after the character with prevCC<=cc 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *q=limit; 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *r=limit+=U16_LENGTH(c); 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *--r=*--q; 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(codePointLimit!=q); 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org writeCodePoint(q, c); 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cc<=1) { 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reorderStart=r; 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Normalizer2Impl --------------------------------------------------------- *** 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct CanonIterData : public UMemory { 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CanonIterData(UErrorCode &errorCode); 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ~CanonIterData(); 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode); 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTrie2 *trie; 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVector canonStartSets; // contains UnicodeSet * 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::~Normalizer2Impl() { 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org udata_close(memory); 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_close(normTrie); 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fCanonIterData; 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool U_CALLCONV 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::isAcceptable(void *context, 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char * /* type */, const char * /*name*/, 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UDataInfo *pInfo) { 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->size>=20 && 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->isBigEndian==U_IS_BIG_ENDIAN && 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->charsetFamily==U_CHARSET_FAMILY && 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[1]==0x72 && 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[2]==0x6d && 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[3]==0x32 && 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->formatVersion[0]==2 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Normalizer2Impl *me=(Normalizer2Impl *)context; 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *inIndexes=(const int32_t *)inBytes; 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(indexesLength<=IX_MIN_MAYBE_YES) { 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP]; 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP]; 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minYesNo=inIndexes[IX_MIN_YES_NO]; 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]; 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minNoNo=inIndexes[IX_MIN_NO_NO]; 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limitNoNo=inIndexes[IX_LIMIT_NO_NO]; 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minMaybeYes=inIndexes[IX_MIN_MAYBE_YES]; 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inBytes+offset, nextOffset-offset, NULL, 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &errorCode); 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=nextOffset; 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org maybeYesCompositions=(const uint16_t *)(inBytes+offset); 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes); 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // smallFCD: new in formatVersion 2 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=nextOffset; 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org smallFCD=inBytes+offset; 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Build tccc180[]. 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300. 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t bits=0; 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(UChar c=0; c<0x180; bits>>=1) { 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((c&0xff)==0) { 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bits=smallFCD[c>>8]; // one byte per 0x100 code points 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bits&1) { 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(int i=0; i<0x20; ++i, ++c) { 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tccc180[c]=(uint8_t)getFCD16FromNormData(c); 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memset(tccc180+c, 0, 0x20); 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=0x20; 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguint8_t Normalizer2Impl::getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const { 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cpStart==(cpLimit-1)) { 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*cpStart; 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]); 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t prevNorm16=getNorm16(c); 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevNorm16<=minYesNo) { 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (uint8_t)(*getMapping(prevNorm16)>>8); // tccc from yesNo 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgenumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) { 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the start code point to the USet */ 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const USetAdder *sa=(const USetAdder *)context; 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, start); 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic uint32_t U_CALLCONV 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgsegmentStarterMapper(const void * /*context*/, uint32_t value) { 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return value&CANON_NOT_SEGMENT_STARTER; 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const { 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the start code point of each same-value range of each trie */ 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_enum(normTrie, NULL, enumPropertyStartsRange, sa); 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add Hangul LV syllables and LV+1 because of skippables */ 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) { 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c+1); 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const { 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the start code point of each same-value range of the canonical iterator data trie */ 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ensureCanonIterData(errorCode)) { 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // currently only used for the SEGMENT_STARTER property 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_enum(fCanonIterData->trie, segmentStarterMapper, enumPropertyStartsRange, sa); 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar * 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src, 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 minNeedDataCP, 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer *buffer, 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Make some effort to support NUL-terminated strings reasonably. 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Take the part of the fast quick check loop that does not look up 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // data and check the first part of the string. 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // After this prefix, determine the string length to simplify the rest 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // of the code. 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevSrc=src; 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c; 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while((c=*src++)<minNeedDataCP && c!=0) {} 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Back out the last character for full processing. 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Copy this prefix. 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(--src!=prevSrc) { 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buffer!=NULL) { 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer->appendZeroCC(prevSrc, src, errorCode); 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return src; 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Dual functionality: 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// buffer!=NULL: normalize 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// buffer==NULL: isNormalized/spanQuickCheckYes 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar * 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::decompose(const UChar *src, const UChar *limit, 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer *buffer, 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 minNoCP=minDecompNoCP; 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(limit==NULL) { 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode); 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return src; 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=u_strchr(src, 0); 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevSrc; 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c=0; 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16=0; 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // only for quick check 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevBoundary=src; 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t prevCC=0; 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // count code units below the minimum or with irrelevant data for the quick check 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(prevSrc=src; src!=limit;) { 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( (c=*src)<minNoCP || 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c)) 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++src; 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(!U16_IS_SURROGATE(c)) { 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c2; 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(c)) { 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c, c2); 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* trail surrogate */ { 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) { 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --src; 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c2, c); 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) { 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src+=U16_LENGTH(c); 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // copy these code units all at once 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src!=prevSrc) { 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buffer!=NULL) { 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!buffer->appendZeroCC(prevSrc, src, errorCode)) { 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=0; 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src==limit) { 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check one above-minimum, relevant code point. 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src+=U16_LENGTH(c); 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buffer!=NULL) { 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!decompose(c, norm16, *buffer, errorCode)) { 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isDecompYes(norm16)) { 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t cc=getCCFromYesOrMaybe(norm16); 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevCC<=cc || cc==0) { 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=cc; 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cc<=1) { 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return prevBoundary; // "no" or cc out of order 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return src; 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Decompose a short piece of text which is likely to contain characters that 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// fail the quick check loop and/or where the quick check loop's overhead 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// is unlikely to be amortized. 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Called by the compose() and makeFCD() implementations. 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit, 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer &buffer, 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(src<limit) { 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16; 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16); 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!decompose(c, norm16, buffer, errorCode)) { 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16, 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer &buffer, 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Only loops for 1:1 algorithmic mappings. 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // get the decomposition and the lead and trail cc's 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isDecompYes(norm16)) { 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c does not decompose 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode); 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isHangul(norm16)) { 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Hangul syllable: decompose algorithmically 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar jamos[3]; 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode); 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isDecompNoAlgorithmic(norm16)) { 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=mapAlgorithmic(c, norm16); 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org norm16=getNorm16(c); 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposes, get everything from the variable-length extra data 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mapping=getMapping(norm16); 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t firstUnit=*mapping; 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=firstUnit&MAPPING_LENGTH_MASK; 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t leadCC, trailCC; 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trailCC=(uint8_t)(firstUnit>>8); 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org leadCC=(uint8_t)(*(mapping-1)>>8); 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org leadCC=0; 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer.append((const UChar *)mapping+1, length, leadCC, trailCC, errorCode); 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar * 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const { 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *decomp=NULL; 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16; 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) { 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c does not decompose 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return decomp; 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isHangul(norm16)) { 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Hangul syllable: decompose algorithmically 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=Hangul::decompose(c, buffer); 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer; 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isDecompNoAlgorithmic(norm16)) { 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=mapAlgorithmic(c, norm16); 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org decomp=buffer; 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=0; 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_APPEND_UNSAFE(buffer, length, c); 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposes, get everything from the variable-length extra data 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mapping=getMapping(norm16); 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=*mapping&MAPPING_LENGTH_MASK; 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (const UChar *)mapping+1; 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// so that a raw mapping fits that consists of one unit ("rm0") 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// plus all but the first two code units of the normal mapping. 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK. 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar * 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const { 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We do not loop in this method because an algorithmic mapping itself 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // becomes a final result rather than having to be decomposed recursively. 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16; 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) { 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c does not decompose 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isHangul(norm16)) { 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Hangul syllable: decompose algorithmically 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Hangul::getRawDecomposition(c, buffer); 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer; 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isDecompNoAlgorithmic(norm16)) { 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=mapAlgorithmic(c, norm16); 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=0; 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_APPEND_UNSAFE(buffer, length, c); 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer; 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposes, get everything from the variable-length extra data 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mapping=getMapping(norm16); 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t firstUnit=*mapping; 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstUnit&MAPPING_HAS_RAW_MAPPING) { 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word. 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Bit 7=MAPPING_HAS_CCC_LCCC_WORD 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1; 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t rm0=*rawMapping; 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(rm0<=MAPPING_LENGTH_MASK) { 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=rm0; 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (const UChar *)rawMapping-rm0; 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Copy the normal mapping and replace its first two code units with rm0. 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer[0]=(UChar)rm0; 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2); 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=mLength-1; 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer; 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=mLength; 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (const UChar *)mapping+1; 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit, 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool doDecompose, 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString &safeMiddle, 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer &buffer, 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.copyReorderableSuffixTo(safeMiddle); 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(doDecompose) { 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org decompose(src, limit, &buffer, errorCode); 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Just merge the strings at the boundary. 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ForwardUTrie2StringIterator iter(normTrie, src, limit); 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t firstCC, prevCC, cc; 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org firstCC=prevCC=cc=getCC(iter.next16()); 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(cc!=0) { 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=cc; 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cc=getCC(iter.next16()); 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }; 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(limit==NULL) { // appendZeroCC() needs limit!=NULL 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=u_strchr(iter.codePointStart, 0); 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) { 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.appendZeroCC(iter.codePointStart, limit, errorCode); 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Note: hasDecompBoundary() could be implemented as aliases to 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// hasFCDBoundaryBefore() and hasFCDBoundaryAfter() 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// at the cost of building the FCD trie for a decomposition normalizer. 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::hasDecompBoundary(UChar32 c, UBool before) const { 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<minDecompNoCP) { 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16=getNorm16(c); 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) { 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(norm16>MIN_NORMAL_MAYBE_YES) { 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; // ccc!=0 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isDecompNoAlgorithmic(norm16)) { 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=mapAlgorithmic(c, norm16); 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposes, get everything from the variable-length extra data 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mapping=getMapping(norm16); 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t firstUnit=*mapping; 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((firstUnit&MAPPING_LENGTH_MASK)==0) { 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!before) { 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // decomp after-boundary: same as hasFCDBoundaryAfter(), 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // fcd16<=1 || trailCC==0 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstUnit>0x1ff) { 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; // trailCC>1 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstUnit<=0xff) { 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; // trailCC==0 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // if(trailCC==1) test leadCC==0, same as checking for before-boundary 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TRUE if leadCC==0 (hasFCDBoundaryBefore()) 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0; 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Finds the recomposition result for 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a forward-combining "lead" character, 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * specified with a pointer to its compositions list, 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and a backward-combining "trail" character. 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If the lead and trail characters combine, then this function returns 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the following "compositeAndFwd" value: 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 21..1 composite character 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bit 0 set if the composite is a forward-combining starter 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * otherwise it returns -1. 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The compositions list has (trail, compositeAndFwd) pair entries, 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * encoded as either pairs or triples of 16-bit units. 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The last entry has the high bit of its first unit set. 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The list is sorted by ascending trail characters (there are no duplicates). 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A linear search is used. 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See normalizer2impl.h for a more detailed description 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the compositions list format. 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) { 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t key1, firstUnit; 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(trail<COMP_1_TRAIL_LIMIT) { 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // trail character is 0..33FF 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // result entry may have 2 or 3 units 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org key1=(uint16_t)(trail<<1); 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(key1>(firstUnit=*list)) { 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list+=2+(firstUnit&COMP_1_TRIPLE); 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstUnit&COMP_1_TRIPLE) { 7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ((int32_t)list[1]<<16)|list[2]; 7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return list[1]; 7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // trail character is 3400..10FFFF 7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // result entry has 3 units 7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org key1=(uint16_t)(COMP_1_TRAIL_LIMIT+ 7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (((trail>>COMP_1_TRAIL_SHIFT))& 7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ~COMP_1_TRIPLE)); 7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT); 7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t secondUnit; 7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(key1>(firstUnit=*list)) { 7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list+=2+(firstUnit&COMP_1_TRIPLE); 7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { 7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(key2>(secondUnit=list[1])) { 7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstUnit&COMP_1_LAST_TUPLE) { 7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list+=3; 7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { 7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2]; 7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param list some character's compositions list 7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param set recursively receives the composites from these compositions 7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const { 7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t firstUnit; 7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t compositeAndFwd; 7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org firstUnit=*list; 7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((firstUnit&COMP_1_TRIPLE)==0) { 7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compositeAndFwd=list[1]; 7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list+=2; 7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2]; 7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list+=3; 7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 composite=compositeAndFwd>>1; 7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((compositeAndFwd&1)!=0) { 7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org addComposites(getCompositionsListForComposite(getNorm16(composite)), set); 7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set.add(composite); 7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while((firstUnit&COMP_1_LAST_TUPLE)==0); 7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Recomposes the buffer text starting at recomposeStartIndex 7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (which is in NFD - decomposed and canonically ordered), 8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and truncates the buffer contents. 8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note that recomposition never lengthens the text: 8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Any character consists of either one or two code units; 8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a composition may contain at most one more code unit than the original starter, 8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * while the combining mark that is removed has at least one code unit. 8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, 8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool onlyContiguous) const { 8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *p=buffer.getStart()+recomposeStartIndex; 8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *limit=buffer.getLimit(); 8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(p==limit) { 8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *starter, *pRemove, *q, *r; 8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *compositionsList; 8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c, compositeAndFwd; 8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16; 8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t cc, prevCC; 8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool starterIsSupplementary; 8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Some of the following variables are not used until we have a forward-combining starter 8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and are only initialized now to avoid compiler warnings. 8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compositionsList=NULL; // used as indicator for whether we have a forward-combining starter 8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starter=NULL; 8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starterIsSupplementary=FALSE; 8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=0; 8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16); 8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cc=getCCFromYesOrMaybe(norm16); 8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( // this character combines backward and 8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMaybe(norm16) && 8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // we have seen a starter that combines forward and 8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compositionsList!=NULL && 8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the backward-combining character is not blocked 8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (prevCC<cc || prevCC==0) 8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isJamoVT(norm16)) { 8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c is a Jamo V/T, see if we can compose it with the previous character. 8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<Hangul::JAMO_T_BASE) { 8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. 8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE); 8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prev<Hangul::JAMO_L_COUNT) { 8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pRemove=p-1; 8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar syllable=(UChar) 8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (Hangul::HANGUL_BASE+ 8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))* 8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Hangul::JAMO_T_COUNT); 8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar t; 8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) { 8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++p; 8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org syllable+=t; // The next character was a Jamo T. 8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *starter=syllable; 8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // remove the Jamo V/T 8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org q=pRemove; 8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org r=p; 8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(r<limit) { 8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *q++=*r++; 8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=q; 8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=pRemove; 8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * No "else" for Jamo T: 8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Since the input is in NFD, there are no Hangul LV syllables that 8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a Jamo T could combine with. 8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * All Jamo Ts are combined above when handling Jamo Vs. 8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(p==limit) { 8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compositionsList=NULL; 8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((compositeAndFwd=combine(compositionsList, c))>=0) { 8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The starter and the combining mark (c) do combine. 8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 composite=compositeAndFwd>>1; 8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Replace the starter with the composite, remove the combining mark. 8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark 8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(starterIsSupplementary) { 8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_IS_SUPPLEMENTARY(composite)) { 8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // both are supplementary 8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starter[0]=U16_LEAD(composite); 8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starter[1]=U16_TRAIL(composite); 8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *starter=(UChar)composite; 8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The composite is shorter than the starter, 8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // move the intermediate characters forward one. 8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starterIsSupplementary=FALSE; 8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org q=starter+1; 8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org r=q+1; 8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(r<pRemove) { 8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *q++=*r++; 8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --pRemove; 8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(U_IS_SUPPLEMENTARY(composite)) { 9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The composite is longer than the starter, 9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // move the intermediate characters back one. 9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starterIsSupplementary=TRUE; 9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++starter; // temporarily increment for the loop boundary 9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org q=pRemove; 9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org r=++pRemove; 9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(starter<q) { 9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *--r=*--q; 9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *starter=U16_TRAIL(composite); 9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *--starter=U16_LEAD(composite); // undo the temporary increment 9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // both are on the BMP 9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *starter=(UChar)composite; 9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* remove the combining mark by moving the following text over it */ 9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pRemove<p) { 9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org q=pRemove; 9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org r=p; 9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(r<limit) { 9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *q++=*r++; 9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=q; 9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=pRemove; 9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Keep prevCC because we removed the combining mark. 9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(p==limit) { 9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Is the composite a starter that combines forward? 9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(compositeAndFwd&1) { 9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compositionsList= 9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org getCompositionsListForComposite(getNorm16(composite)); 9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compositionsList=NULL; 9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We combined; continue with looking for compositions. 9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // no combination this time 9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=cc; 9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(p==limit) { 9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If c did not combine, then check if it is a starter. 9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cc==0) { 9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Found a new starter. 9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) { 9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It may combine with something, prepare for it. 9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_IS_BMP(c)) { 9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starterIsSupplementary=FALSE; 9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starter=p-1; 9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starterIsSupplementary=TRUE; 9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starter=p-2; 9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(onlyContiguous) { 9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // FCC: no discontiguous compositions; any intervening character blocks. 9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compositionsList=NULL; 9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.setReorderingLimit(limit); 9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::composePair(UChar32 a, UChar32 b) const { 9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0 9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *list; 9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isInert(norm16)) { 9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SENTINEL; 9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(norm16<minYesNoMappingsOnly) { 9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isJamoL(norm16)) { 9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b-=Hangul::JAMO_V_BASE; 9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(0<=b && b<Hangul::JAMO_V_COUNT) { 9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (Hangul::HANGUL_BASE+ 9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)* 9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Hangul::JAMO_T_COUNT); 9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SENTINEL; 9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isHangul(norm16)) { 9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b-=Hangul::JAMO_T_BASE; 9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(Hangul::isHangulWithoutJamoT(a) && 0<b && b<Hangul::JAMO_T_COUNT) { // not b==0! 9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return a+b; 9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SENTINEL; 9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 'a' has a compositions list in extraData 9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list=extraData+norm16; 9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list 10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list+= // mapping pointer 10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1+ // +1 to skip the first unit with the mapping lenth 10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*list&MAPPING_LENGTH_MASK); // + mapping length 10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) { 10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SENTINEL; 10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list=maybeYesCompositions+norm16-minMaybeYes; 10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(b<0 || 0x10ffff<b) { // combine(list, b) requires a valid code point b 10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SENTINEL; 10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return combine(list, b)>>1; 10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else 10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t compositeAndFwd=combine(list, b); 10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL; 10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Very similar to composeQuickCheck(): Make the same changes in both places if relevant. 10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// doCompose: normalize 10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// !doCompose: isNormalized (buffer must be empty and initialized) 10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool 10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::compose(const UChar *src, const UChar *limit, 10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool onlyContiguous, 10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool doCompose, 10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer &buffer, 10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * prevBoundary points to the last character before the current one 10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * that has a composition boundary before it with ccc==0 and quick check "yes". 10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Keeping track of prevBoundary saves us looking for a composition boundary 10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * when we find a "no" or "maybe". 10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * When we back out from prevSrc back to prevBoundary, 10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then we also remove those same characters (which had been simply copied 10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or canonically-order-inserted) from the ReorderingBuffer. 10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Therefore, at all times, the [prevBoundary..prevSrc[ source units 10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * must correspond 1:1 to destination units at the end of the destination buffer. 10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevBoundary=src; 10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 minNoMaybeCP=minCompNoMaybeCP; 10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(limit==NULL) { 10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, 10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org doCompose ? &buffer : NULL, 10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode); 10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevBoundary<src) { 10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set prevBoundary to the last character in the prefix. 10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src-1; 10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=u_strchr(src, 0); 10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevSrc; 10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c=0; 10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16=0; 10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // only for isNormalized 10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t prevCC=0; 10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // count code units below the minimum or with irrelevant data for the quick check 10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(prevSrc=src; src!=limit;) { 10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( (c=*src)<minNoMaybeCP || 10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c)) 10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++src; 10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(!U16_IS_SURROGATE(c)) { 10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c2; 10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(c)) { 10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { 10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c, c2); 10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* trail surrogate */ { 10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) { 10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --src; 10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c2, c); 10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isCompYesAndZeroCC(norm16=getNorm16(c))) { 10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src+=U16_LENGTH(c); 10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // copy these code units all at once 10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src!=prevSrc) { 10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(doCompose) { 10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!buffer.appendZeroCC(prevSrc, src, errorCode)) { 10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=0; 11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src==limit) { 11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set prevBoundary to the last character in the quick check loop. 11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src-1; 11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary && 11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_IS_LEAD(*(prevBoundary-1)) 11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --prevBoundary; 11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The start of the current character (c). 11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevSrc=src; 11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(src==limit) { 11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src+=U16_LENGTH(c); 11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. 11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) 11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or has ccc!=0. 11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Check for Jamo V/T, then for regular characters. 11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * c is not a Hangul syllable or Jamo L because those have "yes" properties. 11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isJamoVT(norm16) && prevBoundary!=prevSrc) { 11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar prev=*(prevSrc-1); 11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool needToDecompose=FALSE; 11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<Hangul::JAMO_T_BASE) { 11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. 11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prev=(UChar)(prev-Hangul::JAMO_L_BASE); 11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prev<Hangul::JAMO_L_COUNT) { 11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!doCompose) { 11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar syllable=(UChar) 11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (Hangul::HANGUL_BASE+ 11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))* 11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Hangul::JAMO_T_COUNT); 11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar t; 11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src!=limit && (t=(UChar)(*src-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) { 11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++src; 11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org syllable+=t; // The next character was a Jamo T. 11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.setLastChar(syllable); 11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we see L+V+x where x!=T then we drop to the slow path, 11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // decompose and recompose. 11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This is to deal with NFKC finding normal L and V but a 11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // compatibility variant of a T. We need to either fully compose that 11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // combination here (which would complicate the code and may not work 11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // with strange custom data) or use the slow path -- or else our replacing 11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // two input characters (L+V) with one output character (LV syllable) 11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // would violate the invariant that [prevBoundary..prevSrc[ has the same 11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // length as what we appended to the buffer since prevBoundary. 11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org needToDecompose=TRUE; 11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(Hangul::isHangulWithoutJamoT(prev)) { 11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c is a Jamo Trailing consonant, 11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // compose with previous Hangul LV that does not contain a Jamo T. 11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!doCompose) { 11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.setLastChar((UChar)(prev+c-Hangul::JAMO_T_BASE)); 11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!needToDecompose) { 11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The Jamo V/T did not compose into a Hangul syllable. 11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(doCompose) { 11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!buffer.appendBMP((UChar)c, 0, errorCode)) { 11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=0; 11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Source buffer pointers: 11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * all done quick check current char not yet 11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * "yes" but (c) processed 11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * may combine 11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * forward 11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * [-------------[-------------[-------------[-------------[ 11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * | | | | | 11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * orig. src prevBoundary prevSrc src limit 11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Destination buffer pointers inside the ReorderingBuffer: 11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * all done might take not filled yet 11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * characters for 11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * reordering 11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * [-------------[-------------[-------------[ 11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * | | | | 12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * start reorderStart limit | 12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * +remainingCap.+ 12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16>=MIN_YES_YES_WITH_CC) { 12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t cc=(uint8_t)norm16; // cc!=0 12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( onlyContiguous && // FCC 12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (doCompose ? buffer.getLastCC() : prevCC)==0 && 12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary<prevSrc && 12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that 12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [prevBoundary..prevSrc[ (which is exactly one character under these conditions) 12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // passed the quick check "yes && ccc==0" test. 12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check whether the last character was a "yesYes" or a "yesNo". 12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If a "yesNo", then we get its trailing ccc from its 12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // mapping and check for canonical order. 12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // All other cases are ok. 12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc 12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fails FCD test, need to decompose and contiguously recompose. 12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!doCompose) { 12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(doCompose) { 12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!buffer.append(c, cc, errorCode)) { 12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(prevCC<=cc) { 12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=cc; 12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) { 12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Find appropriate boundaries around this character, 12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * decompose the source text from between the boundaries, 12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and recompose it. 12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * We may need to remove the last few characters from the ReorderingBuffer 12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to account for source text that was copied or appended 12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but needs to take part in the recomposition. 12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Find the last composition boundary in [prevBoundary..src[. 12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It is either the decomposition of the current character (at prevSrc), 12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or prevBoundary. 12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(hasCompBoundaryBefore(c, norm16)) { 12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=prevSrc; 12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(doCompose) { 12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.removeSuffix((int32_t)(prevSrc-prevBoundary)); 12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Find the next composition boundary in [src..limit[ - 12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // modifies src to point to the next starter. 12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src=(UChar *)findNextCompBoundary(src, limit); 12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it. 12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t recomposeStartIndex=buffer.length(); 12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!decomposeShort(prevBoundary, src, buffer, errorCode)) { 12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org recompose(buffer, recomposeStartIndex, onlyContiguous); 12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!doCompose) { 12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!buffer.equals(prevBoundary, src)) { 12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.remove(); 12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=0; 12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Move to the next starter. We never need to look back before this point again. 12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Very similar to compose(): Make the same changes in both places if relevant. 12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// pQCResult==NULL: spanQuickCheckYes 12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES) 12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar * 12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit, 12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool onlyContiguous, 12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationCheckResult *pQCResult) const { 12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * prevBoundary points to the last character before the current one 12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * that has a composition boundary before it with ccc==0 and quick check "yes". 12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevBoundary=src; 12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 minNoMaybeCP=minCompNoMaybeCP; 12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(limit==NULL) { 12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode errorCode=U_ZERO_ERROR; 12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode); 12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevBoundary<src) { 12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set prevBoundary to the last character in the prefix. 12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src-1; 13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=u_strchr(src, 0); 13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevSrc; 13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c=0; 13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16=0; 13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t prevCC=0; 13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // count code units below the minimum or with irrelevant data for the quick check 13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(prevSrc=src;;) { 13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src==limit) { 13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return src; 13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( (c=*src)<minNoMaybeCP || 13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c)) 13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++src; 13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(!U16_IS_SURROGATE(c)) { 13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c2; 13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(c)) { 13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { 13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c, c2); 13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* trail surrogate */ { 13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) { 13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --src; 13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c2, c); 13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isCompYesAndZeroCC(norm16=getNorm16(c))) { 13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src+=U16_LENGTH(c); 13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src!=prevSrc) { 13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set prevBoundary to the last character in the quick check loop. 13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src-1; 13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary && 13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_IS_LEAD(*(prevBoundary-1)) 13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --prevBoundary; 13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=0; 13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The start of the current character (c). 13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevSrc=src; 13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src+=U16_LENGTH(c); 13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. 13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward) 13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or has ccc!=0. 13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isMaybeOrNonZeroCC(norm16)) { 13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t cc=getCCFromYesOrMaybe(norm16); 13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( onlyContiguous && // FCC 13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cc!=0 && 13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC==0 && 13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary<prevSrc && 13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // prevCC==0 && prevBoundary<prevSrc tell us that 13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [prevBoundary..prevSrc[ (which is exactly one character under these conditions) 13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // passed the quick check "yes && ccc==0" test. 13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check whether the last character was a "yesYes" or a "yesNo". 13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If a "yesNo", then we get its trailing ccc from its 13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // mapping and check for canonical order. 13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // All other cases are ok. 13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc 13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fails FCD test. 13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(prevCC<=cc || cc==0) { 13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCC=cc; 13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16<MIN_YES_YES_WITH_CC) { 13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pQCResult!=NULL) { 13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pQCResult=UNORM_MAYBE; 13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return prevBoundary; 13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pQCResult!=NULL) { 13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pQCResult=UNORM_NO; 13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return prevBoundary; 13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit, 13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool doCompose, 13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool onlyContiguous, 13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString &safeMiddle, 13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer &buffer, 13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!buffer.isEmpty()) { 14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *firstStarterInSrc=findNextCompBoundary(src, limit); 14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src!=firstStarterInSrc) { 14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(), 14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.getLimit()); 14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest); 14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString middle(lastStarterInDest, destSuffixLength); 14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.removeSuffix(destSuffixLength); 14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org safeMiddle=middle; 14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org middle.append(src, (int32_t)(firstStarterInSrc-src)); 14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *middleStart=middle.getBuffer(); 14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compose(middleStart, middleStart+middle.length(), onlyContiguous, 14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org TRUE, buffer, errorCode); 14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src=firstStarterInSrc; 14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(doCompose) { 14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(limit==NULL) { // appendZeroCC() needs limit!=NULL 14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=u_strchr(src, 0); 14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.appendZeroCC(src, limit, errorCode); 14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Does c have a composition boundary before it? 14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * True if its decomposition begins with a character that has 14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()). 14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes 14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (isCompYesAndZeroCC()) so we need not decompose. 14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const { 14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isCompYesAndZeroCC(norm16)) { 14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isMaybeOrNonZeroCC(norm16)) { 14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isDecompNoAlgorithmic(norm16)) { 14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=mapAlgorithmic(c, norm16); 14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org norm16=getNorm16(c); 14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposes, get everything from the variable-length extra data 14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mapping=getMapping(norm16); 14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t firstUnit=*mapping; 14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((firstUnit&MAPPING_LENGTH_MASK)==0) { 14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD) && (*(mapping-1)&0xff00)) { 14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; // non-zero leadCC 14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i=1; // skip over the firstUnit 14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT_UNSAFE(mapping, i, c); 14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return isCompYesAndZeroCC(getNorm16(c)); 14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const { 14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16=getNorm16(c); 14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isInert(norm16)) { 14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(norm16<=minYesNo) { 14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Hangul: norm16==minYesNo 14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Hangul LVT has a boundary after it. 14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Hangul LV and non-inert yesYes characters combine forward. 14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return isHangul(norm16) && !Hangul::isHangulWithoutJamoT((UChar)c); 14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) { 14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isDecompNoAlgorithmic(norm16)) { 14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=mapAlgorithmic(c, norm16); 14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposes, get everything from the variable-length extra data. 14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If testInert, then c must be a yesNo character which has lccc=0, 14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // otherwise it could be a noNo. 14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mapping=getMapping(norm16); 14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t firstUnit=*mapping; 14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TRUE if 14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // not MAPPING_NO_COMP_BOUNDARY_AFTER 14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (which is set if 14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c is not deleted, and 14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // it and its decomposition do not combine forward, and it has a starter) 14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and if FCC then trailCC<=1 14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (firstUnit&MAPPING_NO_COMP_BOUNDARY_AFTER)==0 && 14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (!onlyContiguous || firstUnit<=0x1ff); 14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p) const { 14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org BackwardUTrie2StringIterator iter(normTrie, start, p); 14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16; 14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org norm16=iter.previous16(); 15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(!hasCompBoundaryBefore(iter.codePoint, norm16)); 15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We could also test hasCompBoundaryAfter() and return iter.codePointLimit, 15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // but that's probably not worth the extra cost. 15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return iter.codePointStart; 15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit) const { 15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ForwardUTrie2StringIterator iter(normTrie, p, limit); 15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16; 15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org norm16=iter.next16(); 15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(!hasCompBoundaryBefore(iter.codePoint, norm16)); 15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return iter.codePointStart; 15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Note: normalizer2impl.cpp r30982 (2011-nov-27) 15176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// still had getFCDTrie() which built and cached an FCD trie. 15186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// That provided faster access to FCD data than getFCD16FromNormData() 15196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// but required synchronization and consumed some 10kB of heap memory 15206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// in any process that uses FCD (e.g., via collation). 15216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// tccc180[] and smallFCD[] are intended to help with any loss of performance, 15226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// at least for Latin & CJK. 15236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Gets the FCD value from the regular normalization data. 15256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const { 15266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Only loops for 1:1 algorithmic mappings. 15276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 15286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16=getNorm16(c); 15296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16<=minYesNo) { 15306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // no decomposition or Hangul syllable, all zeros 15316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 15326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(norm16>=MIN_NORMAL_MAYBE_YES) { 15336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // combining mark 15346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org norm16&=0xff; 15356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return norm16|(norm16<<8); 15366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(norm16>=minMaybeYes) { 15376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 15386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(isDecompNoAlgorithmic(norm16)) { 15396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=mapAlgorithmic(c, norm16); 15406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 15416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposes, get everything from the variable-length extra data 15426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mapping=getMapping(norm16); 15436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t firstUnit=*mapping; 15446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((firstUnit&MAPPING_LENGTH_MASK)==0) { 15456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A character that is deleted (maps to an empty string) must 15466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // get the worst-case lccc and tccc values because arbitrary 15476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // characters on both sides will become adjacent. 15486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x1ff; 15496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 15506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org norm16=firstUnit>>8; // tccc 15516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { 15526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org norm16|=*(mapping-1)&0xff00; // lccc 15536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return norm16; 15556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 15596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Dual functionality: 15616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// buffer!=NULL: normalize 15626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes 15636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar * 15646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::makeFCD(const UChar *src, const UChar *limit, 15656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer *buffer, 15666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 15676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. 15686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Similar to the prevBoundary in the compose() implementation. 15696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevBoundary=src; 15706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t prevFCD16=0; 15716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(limit==NULL) { 15726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src=copyLowPrefixFromNulTerminated(src, MIN_CCC_LCCC_CP, buffer, errorCode); 15736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 15746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return src; 15756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevBoundary<src) { 15776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 15786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We know that the previous character's lccc==0. 15796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetching the fcd16 value was deferred for this below-U+0300 code point. 15806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevFCD16=getFCD16(*(src-1)); 15816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevFCD16>1) { 15826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --prevBoundary; 15836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=u_strchr(src, 0); 15866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: In this function we use buffer->appendZeroCC() because we track 15896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the lead and trail combining classes here, rather than leaving it to 15906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the ReorderingBuffer. 15916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The exception is the call to decomposeShort() which uses the buffer 15926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in the normal way. 15936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *prevSrc; 15956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c=0; 15966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t fcd16=0; 15976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 15996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // count code units with lccc==0 16006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(prevSrc=src; src!=limit;) { 16016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((c=*src)<MIN_CCC_LCCC_CP) { 16026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevFCD16=~c; 16036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++src; 16046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(!singleLeadMightHaveNonZeroFCD16(c)) { 16056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevFCD16=0; 16066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++src; 16076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 16086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE(c)) { 16096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c2; 16106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(c)) { 16116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { 16126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c, c2); 16136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* trail surrogate */ { 16156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) { 16166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --src; 16176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c2, c); 16186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((fcd16=getFCD16FromNormData(c))<=0xff) { 16226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevFCD16=fcd16; 16236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src+=U16_LENGTH(c); 16246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 16256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 16266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // copy these code units all at once 16306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src!=prevSrc) { 16316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) { 16326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 16336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src==limit) { 16356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 16366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 16386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We know that the previous character's lccc==0. 16396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevFCD16<0) { 16406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetching the fcd16 value was deferred for this below-U+0300 code point. 16416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 prev=~prevFCD16; 16426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev); 16436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevFCD16>1) { 16446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --prevBoundary; 16456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 16476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *p=src-1; 16486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) { 16496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --p; 16506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Need to fetch the previous character's FCD value because 16516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // prevFCD16 was just for the trail surrogate code point. 16526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1])); 16536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Still known to have lccc==0 because its lead surrogate unit had lccc==0. 16546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevFCD16>1) { 16566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=p; 16576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The start of the current character (c). 16606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevSrc=src; 16616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(src==limit) { 16626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 16636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src+=U16_LENGTH(c); 16666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. 16676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check for proper order, and decompose locally if necessary. 16686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((prevFCD16&0xff)<=(fcd16>>8)) { 16696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // proper order: prev tccc <= current lccc 16706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((fcd16&0xff)<=1) { 16716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 16726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) { 16746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 16756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevFCD16=fcd16; 16776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 16786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(buffer==NULL) { 16796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return prevBoundary; // quick check "no" 16806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 16816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 16826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Back out the part of the source that we copied or appended 16836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * already but is now going to be decomposed. 16846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * prevSrc is set to after what was copied/appended. 16856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 16866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer->removeSuffix((int32_t)(prevSrc-prevBoundary)); 16876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 16886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Find the part of the source that needs to be decomposed, 16896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * up to the next safe boundary. 16906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 16916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src=findNextFCDBoundary(src, limit); 16926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 16936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The source text does not fulfill the conditions for FCD. 16946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Decompose and reorder a limited piece of the text. 16956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 16966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!decomposeShort(prevBoundary, src, *buffer, errorCode)) { 16976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 16986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevBoundary=src; 17006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevFCD16=0; 17016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return src; 17046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit, 17076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool doMakeFCD, 17086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString &safeMiddle, 17096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ReorderingBuffer &buffer, 17106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 17116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!buffer.isEmpty()) { 17126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit); 17136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(src!=firstBoundaryInSrc) { 17146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(), 17156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.getLimit()); 17166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest); 17176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString middle(lastBoundaryInDest, destSuffixLength); 17186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.removeSuffix(destSuffixLength); 17196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org safeMiddle=middle; 17206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org middle.append(src, (int32_t)(firstBoundaryInSrc-src)); 17216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *middleStart=middle.getBuffer(); 17226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode); 17236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 17246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 17256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src=firstBoundaryInSrc; 17276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(doMakeFCD) { 17306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org makeFCD(src, limit, &buffer, errorCode); 17316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 17326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(limit==NULL) { // appendZeroCC() needs limit!=NULL 17336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=u_strchr(src, 0); 17346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.appendZeroCC(src, limit, errorCode); 17366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const { 17406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(start<p && previousFCD16(start, p)>0xff) {} 17416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return p; 17426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const { 17456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(p<limit) { 17466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *codePointStart=p; 17476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(nextFCD16(p, limit)<=0xff) { 17486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return codePointStart; 17496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return p; 17526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// CanonicalIterator data -------------------------------------------------- *** 17556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCanonIterData::CanonIterData(UErrorCode &errorCode) : 17576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trie(utrie2_open(0, 0, &errorCode)), 17586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org canonStartSets(uprv_deleteUObject, NULL, errorCode) {} 17596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCanonIterData::~CanonIterData() { 17616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_close(trie); 17626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) { 17656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t canonValue=utrie2_get32(trie, decompLead); 17666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) { 17676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // origin is the first character whose decomposition starts with 17686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the character for which we are setting the value. 17696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_set32(trie, decompLead, canonValue|origin, &errorCode); 17706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 17716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // origin is not the first character, or it is U+0000. 17726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet *set; 17736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((canonValue&CANON_HAS_SET)==0) { 17746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set=new UnicodeSet; 17756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(set==NULL) { 17766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 17776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 17786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK); 17806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size(); 17816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_set32(trie, decompLead, canonValue, &errorCode); 17826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org canonStartSets.addElement(set, errorCode); 17836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(firstOrigin!=0) { 17846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set->add(firstOrigin); 17856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 17876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)]; 17886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set->add(origin); 17906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN 17946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. 17966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// context: the Normalizer2Impl 17976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV 17986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgenumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) { 17996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode errorCode = U_ZERO_ERROR; 18006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (value != 0) { 18016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Normalizer2Impl *impl = (Normalizer2Impl *)context; 18026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org impl->makeCanonIterDataFromNorm16( 18036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start, end, (uint16_t)value, *impl->fCanonIterData, errorCode); 18046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SUCCESS(errorCode); 18066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// UInitOnce instantiation function for CanonIterData 18116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV 18136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orginitCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) { 18146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(impl->fCanonIterData == NULL); 18156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org impl->fCanonIterData = new CanonIterData(errorCode); 18166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (impl->fCanonIterData == NULL) { 18176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 18186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(errorCode)) { 18206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_enum(impl->getNormTrie(), NULL, enumCIDRangeHandler, impl); 18216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode); 18226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(errorCode)) { 18246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete impl->fCanonIterData; 18256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org impl->fCanonIterData = NULL; 18266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END 18306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16, 18326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CanonIterData &newData, 18336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) const { 18346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16==0 || (minYesNo<=norm16 && norm16<minNoNo)) { 18356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Inert, or 2-way mapping (including Hangul syllable). 18366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We do not write a canonStartSet for any yesNo character. 18376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Composites from 2-way mappings are added at runtime from the 18386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // starter's compositions list, and the other characters in 18396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are 18406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // "maybe" characters. 18416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 18426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(UChar32 c=start; c<=end; ++c) { 18446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t oldValue=utrie2_get32(newData.trie, c); 18456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t newValue=oldValue; 18466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16>=minMaybeYes) { 18476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // not a segment starter if it occurs in a decomposition or has cc!=0 18486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newValue|=CANON_NOT_SEGMENT_STARTER; 18496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16<MIN_NORMAL_MAYBE_YES) { 18506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newValue|=CANON_HAS_COMPOSITIONS; 18516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(norm16<minYesNo) { 18536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newValue|=CANON_HAS_COMPOSITIONS; 18546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 18556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c has a one-way decomposition 18566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c2=c; 18576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16_2=norm16; 18586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) { 18596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c2=mapAlgorithmic(c2, norm16_2); 18606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org norm16_2=getNorm16(c2); 18616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(minYesNo<=norm16_2 && norm16_2<limitNoNo) { 18636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposes, get everything from the variable-length extra data 18646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mapping=getMapping(norm16_2); 18656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t firstUnit=*mapping; 18666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=firstUnit&MAPPING_LENGTH_MASK; 18676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { 18686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c==c2 && (*(mapping-1)&0xff)!=0) { 18696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newValue|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0 18706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Skip empty mappings (no characters in the decomposition). 18736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length!=0) { 18746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++mapping; // skip over the firstUnit 18756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // add c to first code point's start set 18766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i=0; 18776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT_UNSAFE(mapping, i, c2); 18786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newData.addToStartSet(c, c2, errorCode); 18796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a 18806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // one-way mapping. A 2-way mapping is possible here after 18816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // intermediate algorithmic mapping. 18826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16_2>=minNoNo) { 18836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(i<length) { 18846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT_UNSAFE(mapping, i, c2); 18856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t c2Value=utrie2_get32(newData.trie, c2); 18866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) { 18876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_set32(newData.trie, c2, c2Value|CANON_NOT_SEGMENT_STARTER, 18886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &errorCode); 18896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 18946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c decomposed to c2 algorithmically; c has cc==0 18956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newData.addToStartSet(c, c2, errorCode); 18966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newValue!=oldValue) { 18996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_set32(newData.trie, c, newValue, &errorCode); 19006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const { 19056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Logically const: Synchronized instantiation. 19066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this); 19076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode); 19086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SUCCESS(errorCode); 19096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer2Impl::getCanonValue(UChar32 c) const { 19126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (int32_t)utrie2_get32(fCanonIterData->trie, c); 19136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const { 19166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *(const UnicodeSet *)fCanonIterData->canonStartSets[n]; 19176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const { 19206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return getCanonValue(c)>=0; 19216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const { 19246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER; 19256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(canonValue==0) { 19266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 19276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set.clear(); 19296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t value=canonValue&CANON_VALUE_MASK; 19306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((canonValue&CANON_HAS_SET)!=0) { 19316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set.addAll(getCanonStartSet(value)); 19326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value!=0) { 19336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set.add(value); 19346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { 19366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t norm16=getNorm16(c); 19376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(norm16==JAMO_L) { 19386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 syllable= 19396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT); 19406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1); 19416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 19426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org addComposites(getCompositionsList(norm16), set); 19436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 19466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 19496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Normalizer2 data swapping ----------------------------------------------- *** 19516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_USE 19536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 19556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm2_swap(const UDataSwapper *ds, 19566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *inData, int32_t length, void *outData, 19576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 19586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UDataInfo *pInfo; 19596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t headerSize; 19606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *inBytes; 19626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *outBytes; 19636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *inIndexes; 19656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t indexes[Normalizer2Impl::IX_MIN_MAYBE_YES+1]; 19666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, offset, nextOffset, size; 19686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* udata_swapDataHeader checks the arguments */ 19706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 19716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 19726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 19736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* check data format and format version */ 19766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo=(const UDataInfo *)((const char *)inData+4); 19776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!( 19786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ 19796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[1]==0x72 && 19806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[2]==0x6d && 19816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[3]==0x32 && 19826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (pInfo->formatVersion[0]==1 || pInfo->formatVersion[0]==2) 19836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org )) { 19846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n", 19856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[0], pInfo->dataFormat[1], 19866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->dataFormat[2], pInfo->dataFormat[3], 19876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pInfo->formatVersion[0]); 19886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_UNSUPPORTED_ERROR; 19896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 19906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inBytes=(const uint8_t *)inData+headerSize; 19936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org outBytes=(uint8_t *)outData+headerSize; 19946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inIndexes=(const int32_t *)inBytes; 19966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length>=0) { 19986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length-=headerSize; 19996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<(int32_t)sizeof(indexes)) { 20006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n", 20016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length); 20026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 20036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 20046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* read the first few indexes */ 20086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<=Normalizer2Impl::IX_MIN_MAYBE_YES; ++i) { 20096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexes[i]=udata_readInt32(ds, inIndexes[i]); 20106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the total length of the data */ 20136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org size=indexes[Normalizer2Impl::IX_TOTAL_SIZE]; 20146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length>=0) { 20166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<size) { 20176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n", 20186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length); 20196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 20206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 20216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* copy the data for inaccessible bytes */ 20246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(inBytes!=outBytes) { 20256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(outBytes, inBytes, size); 20266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 20296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* swap the int32_t indexes[] */ 20316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]; 20326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode); 20336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=nextOffset; 20346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* swap the UTrie2 */ 20366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]; 20376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); 20386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=nextOffset; 20396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* swap the uint16_t extraData[] */ 20416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET]; 20426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); 20436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=nextOffset; 20446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */ 20466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1]; 20476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=nextOffset; 20486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(offset==size); 20506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return headerSize+size; 20536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 20546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif // !UCONFIG_NO_NORMALIZATION 2056