150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 2009-2010, International Business Machines 550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Corporation and others. All Rights Reserved. 650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* file name: n2builder.cpp 950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* encoding: US-ASCII 1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* tab size: 8 (not used) 1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* indentation:4 1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created on: 2009nov25 1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created by: Markus W. Scherer 1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Builds Normalizer2 data and writes a binary .nrm file. 1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* For the file format see source/common/normalizer2impl.h. 1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/ 1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h" 2127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "n2builder.h" 2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdio.h> 2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdlib.h> 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <string.h> 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_HAVE_STD_STRING 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <vector> 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/errorcode.h" 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/localpointer.h" 3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/putil.h" 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/udata.h" 3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uniset.h" 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h" 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h" 3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "hash.h" 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "toolutil.h" 3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unewdata.h" 4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "utrie2.h" 4127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uvectr32.h" 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* UDataInfo cf. udata.h */ 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UDataInfo dataInfo={ 4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho sizeof(UDataInfo), 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 0, 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_IS_BIG_ENDIAN, 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_CHARSET_FAMILY, 5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_SIZEOF_UCHAR, 5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 0, 5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 0x4e, 0x72, 0x6d, 0x32 }, /* dataFormat="Nrm2" */ 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 1, 0, 0, 0 }, /* formatVersion */ 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 5, 2, 0, 0 } /* dataVersion (Unicode version) */ 6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass HangulIterator { 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho struct Range { 6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 start, limit; 6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16; 6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho HangulIterator() : rangeIndex(0) {} 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Range *nextRange() { 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(rangeIndex<LENGTHOF(ranges)) { 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ranges+rangeIndex++; 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void reset() { rangeIndex=0; } 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate: 8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const Range ranges[4]; 8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t rangeIndex; 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 8550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst HangulIterator::Range HangulIterator::ranges[4]={ 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { Hangul::JAMO_L_BASE, Hangul::JAMO_L_BASE+Hangul::JAMO_L_COUNT, 1 }, 8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { Hangul::JAMO_V_BASE, Hangul::JAMO_V_BASE+Hangul::JAMO_V_COUNT, Normalizer2Impl::JAMO_VT }, 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // JAMO_T_BASE+1: not U+11A7 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { Hangul::JAMO_T_BASE+1, Hangul::JAMO_T_BASE+Hangul::JAMO_T_COUNT, Normalizer2Impl::JAMO_VT }, 9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { Hangul::HANGUL_BASE, Hangul::HANGUL_BASE+Hangul::HANGUL_COUNT, 0 }, // will become minYesNo 9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostruct CompositionPair { 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CompositionPair(UChar32 t, UChar32 c) : trail(t), composite(c) {} 9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 trail, composite; 9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostruct Norm { 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho enum MappingType { NONE, REMOVED, ROUND_TRIP, ONE_WAY }; 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool hasMapping() const { return mappingType>REMOVED; } 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Requires hasMapping() and well-formed mapping. 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void setMappingCP() { 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!mapping->isEmpty() && mapping->length()==U16_LENGTH(c=mapping->char32At(0))) { 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho mappingCP=c; 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho mappingCP=U_SENTINEL; 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 11327f654740f2a26ad62a5c155af9199af9e69b889claireho const CompositionPair *getCompositionPairs(int32_t &length) const { 11427f654740f2a26ad62a5c155af9199af9e69b889claireho if(compositions==NULL) { 11527f654740f2a26ad62a5c155af9199af9e69b889claireho length=0; 11627f654740f2a26ad62a5c155af9199af9e69b889claireho return NULL; 11727f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 11827f654740f2a26ad62a5c155af9199af9e69b889claireho length=compositions->size()/2; 11927f654740f2a26ad62a5c155af9199af9e69b889claireho return reinterpret_cast<const CompositionPair *>(compositions->getBuffer()); 12027f654740f2a26ad62a5c155af9199af9e69b889claireho } 12127f654740f2a26ad62a5c155af9199af9e69b889claireho } 12227f654740f2a26ad62a5c155af9199af9e69b889claireho 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString *mapping; 12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 mappingCP; // >=0 if mapping to 1 code point 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t mappingPhase; 12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MappingType mappingType; 12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 12827f654740f2a26ad62a5c155af9199af9e69b889claireho UVector32 *compositions; // (trail, composite) pairs 12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t cc; 13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool combinesBack; 13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool hasNoCompBoundaryAfter; 13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho enum OffsetType { 13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho OFFSET_NONE, OFFSET_MAYBE_YES, 13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho OFFSET_YES_YES, OFFSET_YES_NO, OFFSET_NO_NO, 13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho OFFSET_DELTA 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho enum { OFFSET_SHIFT=4, OFFSET_MASK=(1<<OFFSET_SHIFT)-1 }; 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t offset; 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass Normalizer2DBEnumerator { 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2DBEnumerator(Normalizer2DataBuilder &b) : builder(b) {} 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual ~Normalizer2DBEnumerator() {} 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) = 0; 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2DBEnumerator *ptr() { return this; } 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprotected: 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2DataBuilder &builder; 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 15250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) { 15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return ((Normalizer2DBEnumerator *)context)->rangeHandler(start, end, value); 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 16150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) : 16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL) { 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho memset(unicodeVersion, 0, sizeof(unicodeVersion)); 16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normTrie=utrie2_open(0, 0, &errorCode); 16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm)); 16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norms=allocNorm(); // unused Norm struct at index 0 16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho memset(indexes, 0, sizeof(indexes)); 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::~Normalizer2DataBuilder() { 17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_close(normTrie); 17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t normsLength=utm_countItems(normMem); 17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=1; i<normsLength; ++i) { 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete norms[i].mapping; 17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete norms[i].compositions; 17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utm_close(normMem); 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_close(norm16Trie); 17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid 18250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::setUnicodeVersion(const char *v) { 18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_versionFromString(unicodeVersion, v); 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 18650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNorm *Normalizer2DataBuilder::allocNorm() { 18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm *p=(Norm *)utm_alloc(normMem); 18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norms=(Norm *)utm_getStart(normMem); // in case it got reallocated 18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return p; 19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* get an existing Norm unit */ 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNorm *Normalizer2DataBuilder::getNorm(UChar32 c) { 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t i=utrie2_get32(normTrie, c); 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(i==0) { 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return norms+i; 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst Norm &Normalizer2DataBuilder::getNormRef(UChar32 c) const { 20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return norms[utrie2_get32(normTrie, c)]; 20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * get or create a Norm unit; 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * get or create the intermediate trie entries for it as well 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNorm *Normalizer2DataBuilder::createNorm(UChar32 c) { 21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t i=utrie2_get32(normTrie, c); 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(i!=0) { 21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return norms+i; 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* allocate Norm */ 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm *p=allocNorm(); 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuToolErrorCode errorCode("gennorm2/createNorm()"); 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_set32(normTrie, c, (uint32_t)(p-norms), errorCode); 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return p; 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNorm *Normalizer2DataBuilder::checkNormForMapping(Norm *p, UChar32 c) { 22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p!=NULL) { 22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->mappingType!=Norm::NONE) { 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( overrideHandling==OVERRIDE_NONE || 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (overrideHandling==OVERRIDE_PREVIOUS && p->mappingPhase==phase) 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "error in gennorm2 phase %d: " 23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "not permitted to override mapping for U+%04lX from phase %d\n", 23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int)phase, (long)c, (int)p->mappingPhase); 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete p->mapping; 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->mapping=NULL; 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->mappingPhase=phase; 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return p; 24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setOverrideHandling(OverrideHandling oh) { 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho overrideHandling=oh; 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++phase; 24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setCC(UChar32 c, uint8_t cc) { 24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho createNorm(c)->cc=cc; 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouint8_t Normalizer2DataBuilder::getCC(UChar32 c) const { 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return getNormRef(c).cc; 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool isWellFormed(const UnicodeString &s) { 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode errorCode=U_ZERO_ERROR; 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_strToUTF8(NULL, 0, NULL, s.getBuffer(), s.length(), &errorCode); 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR; 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setOneWayMapping(UChar32 c, const UnicodeString &m) { 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!isWellFormed(m)) { 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "error in gennorm2 phase %d: " 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "illegal one-way mapping from U+%04lX to malformed string\n", 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int)phase, (long)c); 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm *p=checkNormForMapping(createNorm(c), c); 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->mapping=new UnicodeString(m); 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->mappingType=Norm::ONE_WAY; 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->setMappingCP(); 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString &m) { 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_IS_SURROGATE(c)) { 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "error in gennorm2 phase %d: " 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "illegal round-trip mapping from surrogate code point U+%04lX\n", 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int)phase, (long)c); 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!isWellFormed(m)) { 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "error in gennorm2 phase %d: " 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "illegal round-trip mapping from U+%04lX to malformed string\n", 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int)phase, (long)c); 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t numCP=u_countChar32(m.getBuffer(), m.length()); 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(numCP!=2) { 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "error in gennorm2 phase %d: " 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "illegal round-trip mapping from U+%04lX to %d!=2 code points\n", 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int)phase, (long)c, (int)numCP); 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm *p=checkNormForMapping(createNorm(c), c); 29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->mapping=new UnicodeString(m); 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->mappingType=Norm::ROUND_TRIP; 30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->mappingCP=U_SENTINEL; 30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 30450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::removeMapping(UChar32 c) { 30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm *p=checkNormForMapping(getNorm(c), c); 30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p!=NULL) { 30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->mappingType=Norm::REMOVED; 30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass CompositionBuilder : public Normalizer2DBEnumerator { 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CompositionBuilder(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {} 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder.addComposition(start, end, value); 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::addComposition(UChar32 start, UChar32 end, uint32_t value) { 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norms[value].mappingType==Norm::ROUND_TRIP) { 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(start!=end) { 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: same round-trip mapping for " 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "more than 1 code point U+%04lX..U+%04lX\n", 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start, (long)end); 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norms[value].cc!=0) { 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX has a round-trip mapping and ccc!=0, " 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "not possible in Unicode normalization\n", 33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start); 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // setRoundTripMapping() ensured that there are exactly two code points. 33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &m=*norms[value].mapping; 34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 lead=m.char32At(0); 34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 trail=m.char32At(m.length()-1); 34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(getCC(lead)!=0) { 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX's round-trip mapping's starter U+%04lX has ccc!=0, " 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "not possible in Unicode normalization\n", 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start, (long)lead); 34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Flag for trailing character. 35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho createNorm(trail)->combinesBack=TRUE; 35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Insert (trail, composite) pair into compositions list for the lead character. 35327f654740f2a26ad62a5c155af9199af9e69b889claireho IcuToolErrorCode errorCode("gennorm2/addComposition()"); 35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm *leadNorm=createNorm(lead); 35527f654740f2a26ad62a5c155af9199af9e69b889claireho UVector32 *compositions=leadNorm->compositions; 35627f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t i; 35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(compositions==NULL) { 35827f654740f2a26ad62a5c155af9199af9e69b889claireho compositions=leadNorm->compositions=new UVector32(errorCode); 35927f654740f2a26ad62a5c155af9199af9e69b889claireho i=0; // "insert" the first pair at index 0 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Insertion sort, and check for duplicate trail characters. 36227f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t length; 36327f654740f2a26ad62a5c155af9199af9e69b889claireho const CompositionPair *pairs=leadNorm->getCompositionPairs(length); 36427f654740f2a26ad62a5c155af9199af9e69b889claireho for(i=0; i<length; ++i) { 36527f654740f2a26ad62a5c155af9199af9e69b889claireho if(trail==pairs[i].trail) { 36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: same round-trip mapping for " 36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "more than 1 code point (e.g., U+%04lX) to U+%04lX + U+%04lX\n", 36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start, (long)lead, (long)trail); 37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 37227f654740f2a26ad62a5c155af9199af9e69b889claireho if(trail<pairs[i].trail) { 37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 37727f654740f2a26ad62a5c155af9199af9e69b889claireho compositions->insertElementAt(trail, 2*i, errorCode); 37827f654740f2a26ad62a5c155af9199af9e69b889claireho compositions->insertElementAt(start, 2*i+1, errorCode); 37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 38250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2DataBuilder::combinesWithCCBetween(const Norm &norm, 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t lowCC, uint8_t highCC) const { 38427f654740f2a26ad62a5c155af9199af9e69b889claireho if((highCC-lowCC)>=2) { 38527f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t length; 38627f654740f2a26ad62a5c155af9199af9e69b889claireho const CompositionPair *pairs=norm.getCompositionPairs(length); 38727f654740f2a26ad62a5c155af9199af9e69b889claireho for(int32_t i=0; i<length; ++i) { 38827f654740f2a26ad62a5c155af9199af9e69b889claireho uint8_t trailCC=getCC(pairs[i].trail); 38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(lowCC<trailCC && trailCC<highCC) { 39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 39750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUChar32 Normalizer2DataBuilder::combine(const Norm &norm, UChar32 trail) const { 39827f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t length; 39927f654740f2a26ad62a5c155af9199af9e69b889claireho const CompositionPair *pairs=norm.getCompositionPairs(length); 40027f654740f2a26ad62a5c155af9199af9e69b889claireho for(int32_t i=0; i<length; ++i) { 40127f654740f2a26ad62a5c155af9199af9e69b889claireho if(trail==pairs[i].trail) { 40227f654740f2a26ad62a5c155af9199af9e69b889claireho return pairs[i].composite; 40327f654740f2a26ad62a5c155af9199af9e69b889claireho } 40427f654740f2a26ad62a5c155af9199af9e69b889claireho if(trail<pairs[i].trail) { 40527f654740f2a26ad62a5c155af9199af9e69b889claireho break; 40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return U_SENTINEL; 40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass Decomposer : public Normalizer2DBEnumerator { 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Decomposer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b), didDecompose(FALSE) {} 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho didDecompose|=builder.decompose(start, end, value); 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool didDecompose; 41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) { 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norms[value].hasMapping()) { 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &m=*norms[value].mapping; 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString *decomposed=NULL; 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *s=m.getBuffer(); 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=m.length(); 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t prev, i=0; 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(i<length) { 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prev=i; 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(s, i, length, c); 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(start<=c && c<=end) { 43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: U+%04lX maps to itself directly or indirectly\n", 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)c); 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Norm &cNorm=getNormRef(c); 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cNorm.hasMapping()) { 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norms[value].mappingType==Norm::ROUND_TRIP) { 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(prev==0) { 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cNorm.mappingType!=Norm::ROUND_TRIP) { 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX's round-trip mapping's starter " 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX one-way-decomposes, " 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "not possible in Unicode normalization\n", 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start, (long)c); 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t myTrailCC=getCC(m.char32At(i)); 45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 cTrailChar=cNorm.mapping->char32At(cNorm.mapping->length()-1); 45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t cTrailCC=getCC(cTrailChar); 45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cTrailCC>myTrailCC) { 45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX's round-trip mapping's starter " 45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX decomposes and the " 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "inner/earlier tccc=%hu > outer/following tccc=%hu, " 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "not possible in Unicode normalization\n", 46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start, (long)c, 46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (short)cTrailCC, (short)myTrailCC); 46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX's round-trip mapping's non-starter " 47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX decomposes, " 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "not possible in Unicode normalization\n", 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start, (long)c); 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(decomposed==NULL) { 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decomposed=new UnicodeString(m, 0, prev); 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decomposed->append(*cNorm.mapping); 48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(Hangul::isHangul(c)) { 48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar buffer[3]; 48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t hangulLength=Hangul::decompose(c, buffer); 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norms[value].mappingType==Norm::ROUND_TRIP && prev!=0) { 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX's round-trip mapping's non-starter " 48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX decomposes, " 48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "not possible in Unicode normalization\n", 48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start, (long)c); 49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(decomposed==NULL) { 49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decomposed=new UnicodeString(m, 0, prev); 49450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decomposed->append(buffer, hangulLength); 49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(decomposed!=NULL) { 49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decomposed->append(m, prev, i-prev); 49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 49950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(decomposed!=NULL) { 50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete norms[value].mapping; 50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norms[value].mapping=decomposed; 50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not norms[value].setMappingCP(); because the original mapping 50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // is most likely to be encodable as a delta. 50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 50650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 50850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 51150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass BuilderReorderingBuffer { 51250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 51350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho BuilderReorderingBuffer() : fLength(0), fLastStarterIndex(-1), fDidReorder(FALSE) {} 51450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void reset() { 51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLength=0; 51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLastStarterIndex=-1; 51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fDidReorder=FALSE; 51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length() const { return fLength; } 52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isEmpty() const { return fLength==0; } 52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t lastStarterIndex() const { return fLastStarterIndex; } 52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 charAt(int32_t i) const { return fArray[i]>>8; } 52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t ccAt(int32_t i) const { return (uint8_t)fArray[i]; } 52450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool didReorder() const { return fDidReorder; } 52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void append(UChar32 c, uint8_t cc) { 52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cc==0 || fLength==0 || ccAt(fLength-1)<=cc) { 52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(cc==0) { 52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLastStarterIndex=fLength; 52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 53050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fArray[fLength++]=(c<<8)|cc; 53150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Let this character bubble back to its canonical order. 53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i=fLength-1; 53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(i>fLastStarterIndex && ccAt(i)>cc) { 53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --i; 53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++i; // after the last starter or prevCC<=cc 53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Move this and the following characters forward one to make space. 54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t j=fLength; i<j; --j) { 54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fArray[j]=fArray[j-1]; 54250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fArray[i]=(c<<8)|cc; 54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++fLength; 54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fDidReorder=TRUE; 54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void toString(UnicodeString &dest) { 54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest.remove(); 54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=0; i<fLength; ++i) { 55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest.append(charAt(i)); 55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void setComposite(UChar32 composite, int32_t combMarkIndex) { 55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fArray[fLastStarterIndex]=composite<<8; 55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Remove the combining mark that contributed to the composite. 55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho --fLength; 55750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(combMarkIndex<fLength) { 55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fArray[combMarkIndex]=fArray[combMarkIndex+1]; 55950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++combMarkIndex; 56050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate: 56350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t fArray[Normalizer2Impl::MAPPING_LENGTH_MASK]; 56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t fLength; 56550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t fLastStarterIndex; 56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool fDidReorder; 56750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 56950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid 57050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) { 57150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString &m=*p->mapping; 57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=m.length(); 57350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) { 57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; // writeMapping() will complain about it and print the code point. 57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 57650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *s=m.getBuffer(); 57750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i=0; 57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(i<length) { 58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(s, i, length, c); 58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.append(c, getCC(c)); 58250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(buffer.didReorder()) { 58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.toString(m); 58550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 58650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 58750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 58850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(BuilderReorderingBuffer &buffer) { 58950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(buffer.isEmpty()) { 59050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; // maps-to-empty string is no boundary of any kind 59150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t lastStarterIndex=buffer.lastStarterIndex(); 59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(lastStarterIndex<0) { 59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; // no starter 59550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 starter=buffer.charAt(lastStarterIndex); 59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( Hangul::isJamoL(starter) || 59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (Hangul::isJamoV(starter) && 59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 0<lastStarterIndex && Hangul::isJamoL(buffer.charAt(lastStarterIndex-1))) 60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // A Jamo leading consonant or an LV pair combines-forward if it is at the end, 60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // otherwise it is blocked. 60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return lastStarterIndex==buffer.length()-1; 60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 60550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // no Hangul in fully decomposed mapping 60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Norm *starterNorm=&getNormRef(starter); 60750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(starterNorm->compositions==NULL) { 60850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; // the last starter does not combine forward 60950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 61050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compose as far as possible, and see if further compositions are possible. 61150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t prevCC=0; 61250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t combMarkIndex=lastStarterIndex+1; combMarkIndex<buffer.length();) { 61350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t cc=buffer.ccAt(combMarkIndex); // !=0 because after last starter 61450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(combinesWithCCBetween(*starterNorm, prevCC, cc)) { 61550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 61650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 61750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( prevCC<cc && 61850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (starter=combine(*starterNorm, buffer.charAt(combMarkIndex)))>=0 61950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 62050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.setComposite(starter, combMarkIndex); 62150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho starterNorm=&getNormRef(starter); 62250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(starterNorm->compositions==NULL) { 62350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; // the composite does not combine further 62450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 62550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 62650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCC=cc; 62750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++combMarkIndex; 62850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 62950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 63050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TRUE if the final, forward-combining starter is at the end. 63150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return prevCC==0; 63250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 63350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 63450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Requires p->hasMapping(). 63550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeMapping(UChar32 c, const Norm *p, UnicodeString &dataString) { 63650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString &m=*p->mapping; 63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t length=m.length(); 63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) { 63950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 64150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "mapping for U+%04lX longer than maximum of %d\n", 64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)c, Normalizer2Impl::MAPPING_LENGTH_MASK); 64350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 64450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t leadCC, trailCC; 64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(length==0) { 64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho leadCC=trailCC=0; 64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho leadCC=getCC(m.char32At(0)); 65050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho trailCC=getCC(m.char32At(length-1)); 65150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(c<Normalizer2Impl::MIN_CCC_LCCC_CP && (p->cc!=0 || leadCC!=0)) { 65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX below U+0300 has ccc!=0 or lccc!=0, not supported by ICU\n", 65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)c); 65750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t firstUnit=length|(trailCC<<8); 66050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t secondUnit=p->cc|(leadCC<<8); 66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(secondUnit!=0) { 66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstUnit|=Normalizer2Impl::MAPPING_HAS_CCC_LCCC_WORD; 66350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->compositions!=NULL) { 66550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstUnit|=Normalizer2Impl::MAPPING_PLUS_COMPOSITION_LIST; 66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->hasNoCompBoundaryAfter) { 66850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstUnit|=Normalizer2Impl::MAPPING_NO_COMP_BOUNDARY_AFTER; 66950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 67050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataString.append((UChar)firstUnit); 67150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(secondUnit!=0) { 67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataString.append((UChar)secondUnit); 67350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 67450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataString.append(m); 67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 67650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 67750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Requires p->compositions!=NULL. 67850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeCompositions(UChar32 c, const Norm *p, UnicodeString &dataString) { 67950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->cc!=0) { 68050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 68150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX combines-forward and has ccc!=0, not possible in Unicode normalization\n", 68350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)c); 68450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 68550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 68627f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t length; 68727f654740f2a26ad62a5c155af9199af9e69b889claireho const CompositionPair *pairs=p->getCompositionPairs(length); 68850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=0; i<length; ++i) { 68927f654740f2a26ad62a5c155af9199af9e69b889claireho const CompositionPair &pair=pairs[i]; 69050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 22 bits for the composite character and whether it combines forward. 69150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 compositeAndFwd=pair.composite<<1; 69250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(getNormRef(pair.composite).compositions!=NULL) { 69350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho compositeAndFwd|=1; // The composite character also combines-forward. 69450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 69550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Encode most pairs in two units and some in three. 69650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t firstUnit, secondUnit, thirdUnit; 69750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pair.trail<Normalizer2Impl::COMP_1_TRAIL_LIMIT) { 69850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(compositeAndFwd<=0xffff) { 69950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstUnit=pair.trail<<1; 70050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho secondUnit=compositeAndFwd; 70150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho thirdUnit=-1; 70250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 70350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstUnit=(pair.trail<<1)|Normalizer2Impl::COMP_1_TRIPLE; 70450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho secondUnit=compositeAndFwd>>16; 70550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho thirdUnit=compositeAndFwd; 70650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 70750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 70850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstUnit=(Normalizer2Impl::COMP_1_TRAIL_LIMIT+ 70950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (pair.trail>>Normalizer2Impl::COMP_1_TRAIL_SHIFT))| 71050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2Impl::COMP_1_TRIPLE; 71150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho secondUnit=(pair.trail<<Normalizer2Impl::COMP_2_TRAIL_SHIFT)| 71250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (compositeAndFwd>>16); 71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho thirdUnit=compositeAndFwd; 71450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 71550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set the high bit of the first unit if this is the last composition pair. 71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(i==(length-1)) { 71750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho firstUnit|=Normalizer2Impl::COMP_1_LAST_TUPLE; 71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataString.append((UChar)firstUnit).append((UChar)secondUnit); 72050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(thirdUnit>=0) { 72150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataString.append((UChar)thirdUnit); 72250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 72550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 72650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass ExtraDataWriter : public Normalizer2DBEnumerator { 72750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ExtraDataWriter(Normalizer2DataBuilder &b) : 72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2DBEnumerator(b), 73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho yesYesCompositions(1000, (UChar32)0xffff, 2), // 0=inert, 1=Jamo L, 2=start of compositions 73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho yesNoData(1000, (UChar32)0, 1) {} // 0=Hangul, 1=start of normal data 73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { 73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(value!=0) { 73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(start!=end) { 73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: unexpected shared data for " 73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "multiple code points U+%04lX..U+%04lX\n", 73850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)start, (long)end); 73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INTERNAL_PROGRAM_ERROR); 74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder.writeExtraData(start, value, *this); 74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 74450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 74550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString maybeYesCompositions; 74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString yesYesCompositions; 74750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString yesNoData; 74850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString noNoMappings; 74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Hashtable previousNoNoMappings; // If constructed in runtime code, pass in UErrorCode. 75050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 75150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 75250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraDataWriter &writer) { 75350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm *p=norms+value; 75450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->combinesBack) { 75550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->hasMapping()) { 75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 75850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX combines-back and decomposes, not possible in Unicode normalization\n", 75950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)c); 76050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 76150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 76250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->compositions!=NULL) { 76350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->offset= 76450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (writer.maybeYesCompositions.length()<<Norm::OFFSET_SHIFT)| 76550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm::OFFSET_MAYBE_YES; 76650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho writeCompositions(c, p, writer.maybeYesCompositions); 76750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 76850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(!p->hasMapping()) { 76950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->compositions!=NULL) { 77050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->offset= 77150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (writer.yesYesCompositions.length()<<Norm::OFFSET_SHIFT)| 77250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm::OFFSET_YES_YES; 77350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho writeCompositions(c, p, writer.yesYesCompositions); 77450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 77550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(p->mappingType==Norm::ROUND_TRIP) { 77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->offset= 77750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (writer.yesNoData.length()<<Norm::OFFSET_SHIFT)| 77850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm::OFFSET_YES_NO; 77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho writeMapping(c, p, writer.yesNoData); 78050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->compositions!=NULL) { 78150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho writeCompositions(c, p, writer.yesNoData); 78250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 78350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else /* one-way */ { 78450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->compositions!=NULL) { 78550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 78650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 78750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "U+%04lX combines-forward and has a one-way mapping, " 78850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "not possible in Unicode normalization\n", 78950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)c); 79050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 79150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 79250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->cc==0 && optimization!=OPTIMIZE_FAST) { 79350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try a compact, algorithmic encoding. 79450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Only for ccc=0, because we can't store additional information. 79550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->mappingCP>=0) { 79650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t delta=p->mappingCP-c; 79750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) { 79850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->offset=(delta<<Norm::OFFSET_SHIFT)|Norm::OFFSET_DELTA; 79950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 80050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 80150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 80250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->offset==0) { 80350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t oldNoNoLength=writer.noNoMappings.length(); 80450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho writeMapping(c, p, writer.noNoMappings); 80550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString newMapping=writer.noNoMappings.tempSubString(oldNoNoLength); 80650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t previousOffset=writer.previousNoNoMappings.geti(newMapping); 80750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(previousOffset!=0) { 80850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Duplicate, remove the new units and point to the old ones. 80950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho writer.noNoMappings.truncate(oldNoNoLength); 81050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->offset= 81150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((previousOffset-1)<<Norm::OFFSET_SHIFT)| 81250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm::OFFSET_NO_NO; 81350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Enter this new mapping into the hashtable, avoiding value 0 which is "not found". 81550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.puti()"); 81650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho writer.previousNoNoMappings.puti(newMapping, oldNoNoLength+1, errorCode); 81750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho p->offset= 81850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (oldNoNoLength<<Norm::OFFSET_SHIFT)| 81950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm::OFFSET_NO_NO; 82050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 82150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 82250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 82450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 82550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass Norm16Writer : public Normalizer2DBEnumerator { 82650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 82750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Norm16Writer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {} 82850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) { 82950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho builder.writeNorm16(start, end, value); 83050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 83150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 83250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 83350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 83450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeNorm16(UChar32 start, UChar32 end, uint32_t value) { 83550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(value!=0) { 83650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Norm *p=norms+value; 83750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t offset=p->offset>>Norm::OFFSET_SHIFT; 83850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t norm16=0; 83950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isDecompNo=FALSE; 84050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isCompNoMaybe=FALSE; 84150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho switch(p->offset&Norm::OFFSET_MASK) { 84250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case Norm::OFFSET_NONE: 84350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // No mapping, no compositions list. 84450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(p->combinesBack) { 84550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=Normalizer2Impl::MIN_NORMAL_MAYBE_YES+p->cc; 84650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isDecompNo=(UBool)(p->cc!=0); 84750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isCompNoMaybe=TRUE; 84850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(p->cc!=0) { 84950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=Normalizer2Impl::MIN_YES_YES_WITH_CC-1+p->cc; 85050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isDecompNo=isCompNoMaybe=TRUE; 85150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 85250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 85350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case Norm::OFFSET_MAYBE_YES: 85450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]+offset; 85550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isCompNoMaybe=TRUE; 85650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 85750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case Norm::OFFSET_YES_YES: 85850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=offset; 85950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 86050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case Norm::OFFSET_YES_NO: 86150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=indexes[Normalizer2Impl::IX_MIN_YES_NO]+offset; 86250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isDecompNo=TRUE; 86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 86450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case Norm::OFFSET_NO_NO: 86550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=indexes[Normalizer2Impl::IX_MIN_NO_NO]+offset; 86650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isDecompNo=isCompNoMaybe=TRUE; 86750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 86850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case Norm::OFFSET_DELTA: 86950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=getCenterNoNoDelta()+offset; 87050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isDecompNo=isCompNoMaybe=TRUE; 87150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 87250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho default: // Should not occur. 87350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INTERNAL_PROGRAM_ERROR); 87450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 87550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuToolErrorCode errorCode("gennorm2/writeNorm16()"); 87650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_setRange32(norm16Trie, start, end, (uint32_t)norm16, TRUE, errorCode); 87750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isDecompNo && start<indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]) { 87850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=start; 87950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 88050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(isCompNoMaybe && start<indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]) { 88150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=start; 88250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 88350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 88450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 88550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 88650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setHangulData() { 88750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho HangulIterator hi; 88850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const HangulIterator::Range *range; 88950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check that none of the Hangul/Jamo code points have data. 89050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((range=hi.nextRange())!=NULL) { 89150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(UChar32 c=range->start; c<range->limit; ++c) { 89250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(utrie2_get32(norm16Trie, c)!=0) { 89350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 89450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 89550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "illegal mapping/composition/ccc data for Hangul or Jamo U+%04lX\n", 89650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)c); 89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INVALID_FORMAT_ERROR); 89850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set data for algorithmic runtime handling. 90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuToolErrorCode errorCode("gennorm2/setHangulData()"); 90350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho hi.reset(); 90450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((range=hi.nextRange())!=NULL) { 90550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint16_t norm16=range->norm16; 90650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norm16==0) { 90750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16=(uint16_t)indexes[Normalizer2Impl::IX_MIN_YES_NO]; // Hangul LV/LVT encoded as minYesNo 90850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(range->start<indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]) { 90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=range->start; 91050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 91250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(range->start<indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]) { // Jamo V/T are maybeYes 91350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=range->start; 91450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 91550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 91650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_setRange32(norm16Trie, range->start, range->limit-1, norm16, TRUE, errorCode); 91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode.assertSuccess(); 91850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 91950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 92050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 92150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN 92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 92350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV 92450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumRangeMaxValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) { 92550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t *pMaxValue=(uint32_t *)context; 92650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(value>*pMaxValue) { 92750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pMaxValue=value; 92850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 92950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 93050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 93150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 93250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END 93350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 93450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::processData() { 93550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuToolErrorCode errorCode("gennorm2/processData()"); 93650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm16Trie=utrie2_open(0, 0, errorCode); 93750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode.assertSuccess(); 93850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 93950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_enum(normTrie, NULL, enumRangeHandler, CompositionBuilder(*this).ptr()); 94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Decomposer decomposer(*this); 94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decomposer.didDecompose=FALSE; 94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_enum(normTrie, NULL, enumRangeHandler, &decomposer); 94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(decomposer.didDecompose); 94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho BuilderReorderingBuffer buffer; 94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t normsLength=utm_countItems(normMem); 94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=1; i<normsLength; ++i) { 95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(norms[i].hasMapping()) { 95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.reset(); 95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reorder(norms+i, buffer); 95350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norms[i].hasNoCompBoundaryAfter=hasNoCompBoundaryAfter(buffer); 95450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 95550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 95650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=0x110000; 95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=0x110000; 95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ExtraDataWriter extraDataWriter(*this); 96150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_enum(normTrie, NULL, enumRangeHandler, &extraDataWriter); 96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extraData=extraDataWriter.maybeYesCompositions; 96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extraData.append(extraDataWriter.yesYesCompositions). 96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho append(extraDataWriter.yesNoData). 96650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho append(extraDataWriter.noNoMappings); 96750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pad to even length for 4-byte alignment of following data. 96850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(extraData.length()&1) { 96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extraData.append((UChar)0); 97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 97150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_YES_NO]= 97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extraDataWriter.yesYesCompositions.length(); 97450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_NO_NO]= 97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_YES_NO]+ 97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extraDataWriter.yesNoData.length(); 97750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_LIMIT_NO_NO]= 97850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_NO_NO]+ 97950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extraDataWriter.noNoMappings.length(); 98050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]= 98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2Impl::MIN_NORMAL_MAYBE_YES- 98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extraDataWriter.maybeYesCompositions.length(); 98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minNoNoDelta=getCenterNoNoDelta()-Normalizer2Impl::MAX_DELTA; 98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(indexes[Normalizer2Impl::IX_LIMIT_NO_NO]>minNoNoDelta) { 98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, 98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "gennorm2 error: " 98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "data structure overflow, too much mapping composition data\n"); 98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_BUFFER_OVERFLOW_ERROR); 99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 99250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_enum(normTrie, NULL, enumRangeHandler, Norm16Writer(*this).ptr()); 99350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 99450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho setHangulData(); 99550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 99650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look for the "worst" norm16 value of any supplementary code point 99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // corresponding to a lead surrogate, and set it as that surrogate's value. 99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Enables quick check inner loops to look at only code units. 99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We could be more sophisticated: 100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We could collect a bit set for whether there are values in the different 100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // norm16 ranges (yesNo, maybeYes, yesYesWithCC etc.) 100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // and select the best value that only breaks the composition and/or decomposition 100450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // inner loops if necessary. 100550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // However, that seems like overkill for an optimization for supplementary characters. 100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(UChar lead=0xd800; lead<0xdc00; ++lead) { 100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t maxValue=utrie2_get32(norm16Trie, lead); 100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_enumForLeadSurrogate(norm16Trie, lead, NULL, enumRangeMaxValue, &maxValue); 100950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( maxValue>=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO] && 101050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho maxValue>(uint32_t)indexes[Normalizer2Impl::IX_MIN_NO_NO] 101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0. 101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Otherwise it might end up at something like JAMO_VT which stays in 101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the inner decomposition quick check loop. 101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho maxValue=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]-1; 101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_set32ForLeadSurrogateCodeUnit(norm16Trie, lead, maxValue, errorCode); 101850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 101950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 102050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Adjust supplementary minimum code points to break quick check loops at their lead surrogates. 102150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For an empty data file, minCP=0x110000 turns into 0xdc00 (first trail surrogate) 102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // which is harmless. 102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // As a result, the minimum code points are always BMP code points. 102450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCP=indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]; 102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(minCP>=0x10000) { 102650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=U16_LEAD(minCP); 102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 102850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho minCP=indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]; 102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(minCP>=0x10000) { 103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=U16_LEAD(minCP); 103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 103350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 103450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeBinaryFile(const char *filename) { 103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho processData(); 103650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()"); 103850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_freeze(norm16Trie, UTRIE2_16_VALUE_BITS, errorCode); 103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode); 104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(errorCode.get()!=U_BUFFER_OVERFLOW_ERROR) { 104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2 error: unable to freeze/serialize the normalization trie - %s\n", 104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode.errorName()); 104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(errorCode.reset()); 104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode.reset(); 104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]); 104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode); 104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode.assertSuccess(); 104950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 105050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t offset=(int32_t)sizeof(indexes); 105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]=offset; 105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho offset+=norm16TrieLength; 105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]=offset; 105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t totalSize=offset+=extraData.length()*2; 105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=Normalizer2Impl::IX_RESERVED2_OFFSET; i<=Normalizer2Impl::IX_TOTAL_SIZE; ++i) { 105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho indexes[i]=totalSize; 105750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(beVerbose) { 106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("size of normalization trie: %5ld bytes\n", (long)norm16TrieLength); 106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("size of 16-bit extra data: %5ld uint16_t\n", (long)extraData.length()); 106250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("size of binary data file contents: %5ld bytes\n", (long)totalSize); 106350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("minDecompNoCodePoint: U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]); 106450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("minCompNoMaybeCodePoint: U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]); 106550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("minYesNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_YES_NO]); 106650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("minNoNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_NO_NO]); 106750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("limitNoNo: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]); 106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("minMaybeYes: 0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]); 106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho memcpy(dataInfo.dataVersion, unicodeVersion, 4); 107250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNewDataMemory *pData= 107350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_create(NULL, NULL, filename, &dataInfo, 107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho haveCopyright ? U_COPYRIGHT_STRING : NULL, errorCode); 107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(errorCode.isFailure()) { 107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2 error: unable to create the output file %s - %s\n", 107750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho filename, errorCode.errorName()); 107850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(errorCode.reset()); 107950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 108050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_writeBlock(pData, indexes, sizeof(indexes)); 108150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_writeBlock(pData, norm16TrieBytes.getAlias(), norm16TrieLength); 108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho udata_writeUString(pData, extraData.getBuffer(), extraData.length()); 108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t writtenSize=udata_finish(pData, errorCode); 108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(errorCode.isFailure()) { 108650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2: error %s writing the output file\n", errorCode.errorName()); 108750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(errorCode.reset()); 108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(writtenSize!=totalSize) { 109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "gennorm2 error: written size %ld != calculated size %ld\n", 109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (long)writtenSize, (long)totalSize); 109250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exit(U_INTERNAL_PROGRAM_ERROR); 109350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 109650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 109750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 109850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif /* #if !UCONFIG_NO_NORMALIZATION */ 109950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 110050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Hey, Emacs, please set the following: 110250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 110350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Local Variables: 110450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * indent-tabs-mode: nil 110550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * End: 110650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 1107