150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 2009-2010, International Business Machines
550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Corporation and others.  All Rights Reserved.
650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   file name:  n2builder.cpp
950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   encoding:   US-ASCII
1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   tab size:   8 (not used)
1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   indentation:4
1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created on: 2009nov25
1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created by: Markus W. Scherer
1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Builds Normalizer2 data and writes a binary .nrm file.
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* For the file format see source/common/normalizer2impl.h.
1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/
1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h"
2127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "n2builder.h"
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdio.h>
2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <stdlib.h>
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <string.h>
2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_HAVE_STD_STRING
2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include <vector>
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/errorcode.h"
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/localpointer.h"
3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/putil.h"
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/udata.h"
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uniset.h"
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h"
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h"
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "hash.h"
3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h"
3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "toolutil.h"
3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unewdata.h"
4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "utrie2.h"
4127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uvectr32.h"
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* UDataInfo cf. udata.h */
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UDataInfo dataInfo={
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    sizeof(UDataInfo),
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    0,
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_IS_BIG_ENDIAN,
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_CHARSET_FAMILY,
5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_SIZEOF_UCHAR,
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    0,
5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    { 0x4e, 0x72, 0x6d, 0x32 }, /* dataFormat="Nrm2" */
5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    { 1, 0, 0, 0 },             /* formatVersion */
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    { 5, 2, 0, 0 }              /* dataVersion (Unicode version) */
6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN
6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass HangulIterator {
6550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    struct Range {
6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 start, limit;
6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t norm16;
6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    HangulIterator() : rangeIndex(0) {}
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const Range *nextRange() {
7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(rangeIndex<LENGTHOF(ranges)) {
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return ranges+rangeIndex++;
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return NULL;
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    void reset() { rangeIndex=0; }
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate:
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    static const Range ranges[4];
8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t rangeIndex;
8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
8550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst HangulIterator::Range HangulIterator::ranges[4]={
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    { Hangul::JAMO_L_BASE, Hangul::JAMO_L_BASE+Hangul::JAMO_L_COUNT, 1 },
8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    { Hangul::JAMO_V_BASE, Hangul::JAMO_V_BASE+Hangul::JAMO_V_COUNT, Normalizer2Impl::JAMO_VT },
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // JAMO_T_BASE+1: not U+11A7
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    { Hangul::JAMO_T_BASE+1, Hangul::JAMO_T_BASE+Hangul::JAMO_T_COUNT, Normalizer2Impl::JAMO_VT },
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    { Hangul::HANGUL_BASE, Hangul::HANGUL_BASE+Hangul::HANGUL_COUNT, 0 },  // will become minYesNo
9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostruct CompositionPair {
9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    CompositionPair(UChar32 t, UChar32 c) : trail(t), composite(c) {}
9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 trail, composite;
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostruct Norm {
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    enum MappingType { NONE, REMOVED, ROUND_TRIP, ONE_WAY };
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool hasMapping() const { return mappingType>REMOVED; }
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Requires hasMapping() and well-formed mapping.
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    void setMappingCP() {
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(!mapping->isEmpty() && mapping->length()==U16_LENGTH(c=mapping->char32At(0))) {
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            mappingCP=c;
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            mappingCP=U_SENTINEL;
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
11327f654740f2a26ad62a5c155af9199af9e69b889claireho    const CompositionPair *getCompositionPairs(int32_t &length) const {
11427f654740f2a26ad62a5c155af9199af9e69b889claireho        if(compositions==NULL) {
11527f654740f2a26ad62a5c155af9199af9e69b889claireho            length=0;
11627f654740f2a26ad62a5c155af9199af9e69b889claireho            return NULL;
11727f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
11827f654740f2a26ad62a5c155af9199af9e69b889claireho            length=compositions->size()/2;
11927f654740f2a26ad62a5c155af9199af9e69b889claireho            return reinterpret_cast<const CompositionPair *>(compositions->getBuffer());
12027f654740f2a26ad62a5c155af9199af9e69b889claireho        }
12127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
12227f654740f2a26ad62a5c155af9199af9e69b889claireho
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString *mapping;
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 mappingCP;  // >=0 if mapping to 1 code point
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t mappingPhase;
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    MappingType mappingType;
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
12827f654740f2a26ad62a5c155af9199af9e69b889claireho    UVector32 *compositions;  // (trail, composite) pairs
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t cc;
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool combinesBack;
13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool hasNoCompBoundaryAfter;
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    enum OffsetType {
13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        OFFSET_NONE, OFFSET_MAYBE_YES,
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        OFFSET_YES_YES, OFFSET_YES_NO, OFFSET_NO_NO,
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        OFFSET_DELTA
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    enum { OFFSET_SHIFT=4, OFFSET_MASK=(1<<OFFSET_SHIFT)-1 };
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t offset;
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass Normalizer2DBEnumerator {
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Normalizer2DBEnumerator(Normalizer2DataBuilder &b) : builder(b) {}
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual ~Normalizer2DBEnumerator() {}
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) = 0;
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Normalizer2DBEnumerator *ptr() { return this; }
14850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprotected:
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Normalizer2DataBuilder &builder;
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
15250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return ((Normalizer2DBEnumerator *)context)->rangeHandler(start, end, value);
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
16150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::Normalizer2DataBuilder(UErrorCode &errorCode) :
16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        phase(0), overrideHandling(OVERRIDE_PREVIOUS), optimization(OPTIMIZE_NORMAL) {
16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    memset(unicodeVersion, 0, sizeof(unicodeVersion));
16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normTrie=utrie2_open(0, 0, &errorCode);
16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normMem=utm_open("gennorm2 normalization structs", 10000, 0x110100, sizeof(Norm));
16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    norms=allocNorm();  // unused Norm struct at index 0
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    memset(indexes, 0, sizeof(indexes));
16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
17050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::~Normalizer2DataBuilder() {
17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_close(normTrie);
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t normsLength=utm_countItems(normMem);
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(int32_t i=1; i<normsLength; ++i) {
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete norms[i].mapping;
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete norms[i].compositions;
17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utm_close(normMem);
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_close(norm16Trie);
17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
18150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid
18250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::setUnicodeVersion(const char *v) {
18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_versionFromString(unicodeVersion, v);
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
18650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNorm *Normalizer2DataBuilder::allocNorm() {
18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Norm *p=(Norm *)utm_alloc(normMem);
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    norms=(Norm *)utm_getStart(normMem);  // in case it got reallocated
18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return p;
19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* get an existing Norm unit */
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNorm *Normalizer2DataBuilder::getNorm(UChar32 c) {
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t i=utrie2_get32(normTrie, c);
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(i==0) {
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return norms+i;
19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
20150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst Norm &Normalizer2DataBuilder::getNormRef(UChar32 c) const {
20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return norms[utrie2_get32(normTrie, c)];
20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * get or create a Norm unit;
20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * get or create the intermediate trie entries for it as well
20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
20950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNorm *Normalizer2DataBuilder::createNorm(UChar32 c) {
21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t i=utrie2_get32(normTrie, c);
21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(i!=0) {
21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return norms+i;
21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* allocate Norm */
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        Norm *p=allocNorm();
21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        IcuToolErrorCode errorCode("gennorm2/createNorm()");
21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_set32(normTrie, c, (uint32_t)(p-norms), errorCode);
21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return p;
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNorm *Normalizer2DataBuilder::checkNormForMapping(Norm *p, UChar32 c) {
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(p!=NULL) {
22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p->mappingType!=Norm::NONE) {
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( overrideHandling==OVERRIDE_NONE ||
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (overrideHandling==OVERRIDE_PREVIOUS && p->mappingPhase==phase)
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fprintf(stderr,
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        "error in gennorm2 phase %d: "
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        "not permitted to override mapping for U+%04lX from phase %d\n",
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (int)phase, (long)c, (int)p->mappingPhase);
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                exit(U_INVALID_FORMAT_ERROR);
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete p->mapping;
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            p->mapping=NULL;
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        p->mappingPhase=phase;
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return p;
24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setOverrideHandling(OverrideHandling oh) {
24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    overrideHandling=oh;
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ++phase;
24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setCC(UChar32 c, uint8_t cc) {
24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    createNorm(c)->cc=cc;
24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
25150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouint8_t Normalizer2DataBuilder::getCC(UChar32 c) const {
25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return getNormRef(c).cc;
25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool isWellFormed(const UnicodeString &s) {
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode errorCode=U_ZERO_ERROR;
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_strToUTF8(NULL, 0, NULL, s.getBuffer(), s.length(), &errorCode);
25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR;
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setOneWayMapping(UChar32 c, const UnicodeString &m) {
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!isWellFormed(m)) {
26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "error in gennorm2 phase %d: "
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "illegal one-way mapping from U+%04lX to malformed string\n",
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (int)phase, (long)c);
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_INVALID_FORMAT_ERROR);
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Norm *p=checkNormForMapping(createNorm(c), c);
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    p->mapping=new UnicodeString(m);
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    p->mappingType=Norm::ONE_WAY;
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    p->setMappingCP();
27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setRoundTripMapping(UChar32 c, const UnicodeString &m) {
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_IS_SURROGATE(c)) {
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "error in gennorm2 phase %d: "
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "illegal round-trip mapping from surrogate code point U+%04lX\n",
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (int)phase, (long)c);
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_INVALID_FORMAT_ERROR);
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!isWellFormed(m)) {
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "error in gennorm2 phase %d: "
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "illegal round-trip mapping from U+%04lX to malformed string\n",
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (int)phase, (long)c);
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_INVALID_FORMAT_ERROR);
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t numCP=u_countChar32(m.getBuffer(), m.length());
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(numCP!=2) {
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "error in gennorm2 phase %d: "
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "illegal round-trip mapping from U+%04lX to %d!=2 code points\n",
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (int)phase, (long)c, (int)numCP);
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_INVALID_FORMAT_ERROR);
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Norm *p=checkNormForMapping(createNorm(c), c);
29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    p->mapping=new UnicodeString(m);
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    p->mappingType=Norm::ROUND_TRIP;
30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    p->mappingCP=U_SENTINEL;
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::removeMapping(UChar32 c) {
30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Norm *p=checkNormForMapping(getNorm(c), c);
30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(p!=NULL) {
30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        p->mappingType=Norm::REMOVED;
30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
31150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass CompositionBuilder : public Normalizer2DBEnumerator {
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    CompositionBuilder(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {}
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        builder.addComposition(start, end, value);
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::addComposition(UChar32 start, UChar32 end, uint32_t value) {
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(norms[value].mappingType==Norm::ROUND_TRIP) {
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(start!=end) {
32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fprintf(stderr,
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "gennorm2 error: same round-trip mapping for "
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "more than 1 code point U+%04lX..U+%04lX\n",
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (long)start, (long)end);
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            exit(U_INVALID_FORMAT_ERROR);
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(norms[value].cc!=0) {
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fprintf(stderr,
33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "gennorm2 error: "
33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "U+%04lX has a round-trip mapping and ccc!=0, "
33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "not possible in Unicode normalization\n",
33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (long)start);
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            exit(U_INVALID_FORMAT_ERROR);
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // setRoundTripMapping() ensured that there are exactly two code points.
33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UnicodeString &m=*norms[value].mapping;
34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 lead=m.char32At(0);
34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 trail=m.char32At(m.length()-1);
34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(getCC(lead)!=0) {
34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fprintf(stderr,
34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "gennorm2 error: "
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "U+%04lX's round-trip mapping's starter U+%04lX has ccc!=0, "
34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "not possible in Unicode normalization\n",
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (long)start, (long)lead);
34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            exit(U_INVALID_FORMAT_ERROR);
34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Flag for trailing character.
35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        createNorm(trail)->combinesBack=TRUE;
35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Insert (trail, composite) pair into compositions list for the lead character.
35327f654740f2a26ad62a5c155af9199af9e69b889claireho        IcuToolErrorCode errorCode("gennorm2/addComposition()");
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        Norm *leadNorm=createNorm(lead);
35527f654740f2a26ad62a5c155af9199af9e69b889claireho        UVector32 *compositions=leadNorm->compositions;
35627f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t i;
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(compositions==NULL) {
35827f654740f2a26ad62a5c155af9199af9e69b889claireho            compositions=leadNorm->compositions=new UVector32(errorCode);
35927f654740f2a26ad62a5c155af9199af9e69b889claireho            i=0;  // "insert" the first pair at index 0
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Insertion sort, and check for duplicate trail characters.
36227f654740f2a26ad62a5c155af9199af9e69b889claireho            int32_t length;
36327f654740f2a26ad62a5c155af9199af9e69b889claireho            const CompositionPair *pairs=leadNorm->getCompositionPairs(length);
36427f654740f2a26ad62a5c155af9199af9e69b889claireho            for(i=0; i<length; ++i) {
36527f654740f2a26ad62a5c155af9199af9e69b889claireho                if(trail==pairs[i].trail) {
36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fprintf(stderr,
36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            "gennorm2 error: same round-trip mapping for "
36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            "more than 1 code point (e.g., U+%04lX) to U+%04lX + U+%04lX\n",
36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            (long)start, (long)lead, (long)trail);
37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    exit(U_INVALID_FORMAT_ERROR);
37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
37227f654740f2a26ad62a5c155af9199af9e69b889claireho                if(trail<pairs[i].trail) {
37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
37727f654740f2a26ad62a5c155af9199af9e69b889claireho        compositions->insertElementAt(trail, 2*i, errorCode);
37827f654740f2a26ad62a5c155af9199af9e69b889claireho        compositions->insertElementAt(start, 2*i+1, errorCode);
37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
38250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2DataBuilder::combinesWithCCBetween(const Norm &norm,
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                    uint8_t lowCC, uint8_t highCC) const {
38427f654740f2a26ad62a5c155af9199af9e69b889claireho    if((highCC-lowCC)>=2) {
38527f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t length;
38627f654740f2a26ad62a5c155af9199af9e69b889claireho        const CompositionPair *pairs=norm.getCompositionPairs(length);
38727f654740f2a26ad62a5c155af9199af9e69b889claireho        for(int32_t i=0; i<length; ++i) {
38827f654740f2a26ad62a5c155af9199af9e69b889claireho            uint8_t trailCC=getCC(pairs[i].trail);
38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(lowCC<trailCC && trailCC<highCC) {
39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return TRUE;
39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return FALSE;
39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
39750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUChar32 Normalizer2DataBuilder::combine(const Norm &norm, UChar32 trail) const {
39827f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t length;
39927f654740f2a26ad62a5c155af9199af9e69b889claireho    const CompositionPair *pairs=norm.getCompositionPairs(length);
40027f654740f2a26ad62a5c155af9199af9e69b889claireho    for(int32_t i=0; i<length; ++i) {
40127f654740f2a26ad62a5c155af9199af9e69b889claireho        if(trail==pairs[i].trail) {
40227f654740f2a26ad62a5c155af9199af9e69b889claireho            return pairs[i].composite;
40327f654740f2a26ad62a5c155af9199af9e69b889claireho        }
40427f654740f2a26ad62a5c155af9199af9e69b889claireho        if(trail<pairs[i].trail) {
40527f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return U_SENTINEL;
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass Decomposer : public Normalizer2DBEnumerator {
41250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Decomposer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b), didDecompose(FALSE) {}
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        didDecompose|=builder.decompose(start, end, value);
41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool didDecompose;
41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) {
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(norms[value].hasMapping()) {
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UnicodeString &m=*norms[value].mapping;
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString *decomposed=NULL;
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *s=m.getBuffer();
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t length=m.length();
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t prev, i=0;
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(i<length) {
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prev=i;
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(s, i, length, c);
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(start<=c && c<=end) {
43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fprintf(stderr,
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        "gennorm2 error: U+%04lX maps to itself directly or indirectly\n",
43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (long)c);
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                exit(U_INVALID_FORMAT_ERROR);
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const Norm &cNorm=getNormRef(c);
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(cNorm.hasMapping()) {
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(norms[value].mappingType==Norm::ROUND_TRIP) {
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prev==0) {
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if(cNorm.mappingType!=Norm::ROUND_TRIP) {
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fprintf(stderr,
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "gennorm2 error: "
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "U+%04lX's round-trip mapping's starter "
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "U+%04lX one-way-decomposes, "
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "not possible in Unicode normalization\n",
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    (long)start, (long)c);
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            exit(U_INVALID_FORMAT_ERROR);
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        uint8_t myTrailCC=getCC(m.char32At(i));
45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar32 cTrailChar=cNorm.mapping->char32At(cNorm.mapping->length()-1);
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        uint8_t cTrailCC=getCC(cTrailChar);
45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if(cTrailCC>myTrailCC) {
45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fprintf(stderr,
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "gennorm2 error: "
45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "U+%04lX's round-trip mapping's starter "
45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "U+%04lX decomposes and the "
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "inner/earlier tccc=%hu > outer/following tccc=%hu, "
46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    "not possible in Unicode normalization\n",
46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    (long)start, (long)c,
46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    (short)cTrailCC, (short)myTrailCC);
46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            exit(U_INVALID_FORMAT_ERROR);
46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fprintf(stderr,
46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                "gennorm2 error: "
46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                "U+%04lX's round-trip mapping's non-starter "
47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                "U+%04lX decomposes, "
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                "not possible in Unicode normalization\n",
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                (long)start, (long)c);
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        exit(U_INVALID_FORMAT_ERROR);
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(decomposed==NULL) {
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    decomposed=new UnicodeString(m, 0, prev);
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                decomposed->append(*cNorm.mapping);
48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(Hangul::isHangul(c)) {
48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar buffer[3];
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t hangulLength=Hangul::decompose(c, buffer);
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(norms[value].mappingType==Norm::ROUND_TRIP && prev!=0) {
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fprintf(stderr,
48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            "gennorm2 error: "
48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            "U+%04lX's round-trip mapping's non-starter "
48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            "U+%04lX decomposes, "
48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            "not possible in Unicode normalization\n",
48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            (long)start, (long)c);
49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    exit(U_INVALID_FORMAT_ERROR);
49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(decomposed==NULL) {
49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    decomposed=new UnicodeString(m, 0, prev);
49450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                decomposed->append(buffer, hangulLength);
49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(decomposed!=NULL) {
49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                decomposed->append(m, prev, i-prev);
49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
49950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(decomposed!=NULL) {
50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delete norms[value].mapping;
50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norms[value].mapping=decomposed;
50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Not  norms[value].setMappingCP();  because the original mapping
50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // is most likely to be encodable as a delta.
50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
50650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
50850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return FALSE;
50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
51150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass BuilderReorderingBuffer {
51250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
51350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    BuilderReorderingBuffer() : fLength(0), fLastStarterIndex(-1), fDidReorder(FALSE) {}
51450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    void reset() {
51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLength=0;
51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLastStarterIndex=-1;
51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fDidReorder=FALSE;
51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length() const { return fLength; }
52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool isEmpty() const { return fLength==0; }
52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t lastStarterIndex() const { return fLastStarterIndex; }
52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 charAt(int32_t i) const { return fArray[i]>>8; }
52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t ccAt(int32_t i) const { return (uint8_t)fArray[i]; }
52450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool didReorder() const { return fDidReorder; }
52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    void append(UChar32 c, uint8_t cc) {
52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(cc==0 || fLength==0 || ccAt(fLength-1)<=cc) {
52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(cc==0) {
52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fLastStarterIndex=fLength;
52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
53050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fArray[fLength++]=(c<<8)|cc;
53150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return;
53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Let this character bubble back to its canonical order.
53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t i=fLength-1;
53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(i>fLastStarterIndex && ccAt(i)>cc) {
53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            --i;
53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ++i;  // after the last starter or prevCC<=cc
53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Move this and the following characters forward one to make space.
54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(int32_t j=fLength; i<j; --j) {
54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fArray[j]=fArray[j-1];
54250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fArray[i]=(c<<8)|cc;
54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ++fLength;
54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fDidReorder=TRUE;
54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    void toString(UnicodeString &dest) {
54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dest.remove();
54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(int32_t i=0; i<fLength; ++i) {
55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            dest.append(charAt(i));
55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    void setComposite(UChar32 composite, int32_t combMarkIndex) {
55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fArray[fLastStarterIndex]=composite<<8;
55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Remove the combining mark that contributed to the composite.
55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        --fLength;
55750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(combMarkIndex<fLength) {
55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fArray[combMarkIndex]=fArray[combMarkIndex+1];
55950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++combMarkIndex;
56050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
56150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
56250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate:
56350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t fArray[Normalizer2Impl::MAPPING_LENGTH_MASK];
56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t fLength;
56550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t fLastStarterIndex;
56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool fDidReorder;
56750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
56950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid
57050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) {
57150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString &m=*p->mapping;
57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=m.length();
57350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) {
57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;  // writeMapping() will complain about it and print the code point.
57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
57650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *s=m.getBuffer();
57750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i=0;
57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c;
57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(i<length) {
58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U16_NEXT(s, i, length, c);
58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        buffer.append(c, getCC(c));
58250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(buffer.didReorder()) {
58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        buffer.toString(m);
58550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
58650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
58750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
58850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(BuilderReorderingBuffer &buffer) {
58950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(buffer.isEmpty()) {
59050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;  // maps-to-empty string is no boundary of any kind
59150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t lastStarterIndex=buffer.lastStarterIndex();
59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(lastStarterIndex<0) {
59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;  // no starter
59550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 starter=buffer.charAt(lastStarterIndex);
59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( Hangul::isJamoL(starter) ||
59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (Hangul::isJamoV(starter) &&
59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         0<lastStarterIndex && Hangul::isJamoL(buffer.charAt(lastStarterIndex-1)))
60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // A Jamo leading consonant or an LV pair combines-forward if it is at the end,
60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // otherwise it is blocked.
60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return lastStarterIndex==buffer.length()-1;
60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
60550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // no Hangul in fully decomposed mapping
60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const Norm *starterNorm=&getNormRef(starter);
60750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(starterNorm->compositions==NULL) {
60850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;  // the last starter does not combine forward
60950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
61050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Compose as far as possible, and see if further compositions are possible.
61150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t prevCC=0;
61250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(int32_t combMarkIndex=lastStarterIndex+1; combMarkIndex<buffer.length();) {
61350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint8_t cc=buffer.ccAt(combMarkIndex);  // !=0 because after last starter
61450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(combinesWithCCBetween(*starterNorm, prevCC, cc)) {
61550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
61650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
61750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if( prevCC<cc &&
61850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            (starter=combine(*starterNorm, buffer.charAt(combMarkIndex)))>=0
61950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ) {
62050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer.setComposite(starter, combMarkIndex);
62150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            starterNorm=&getNormRef(starter);
62250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(starterNorm->compositions==NULL) {
62350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;  // the composite does not combine further
62450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
62550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
62650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCC=cc;
62750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++combMarkIndex;
62850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
62950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
63050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // TRUE if the final, forward-combining starter is at the end.
63150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return prevCC==0;
63250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
63350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
63450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Requires p->hasMapping().
63550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeMapping(UChar32 c, const Norm *p, UnicodeString &dataString) {
63650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString &m=*p->mapping;
63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=m.length();
63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) {
63950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "gennorm2 error: "
64150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "mapping for U+%04lX longer than maximum of %d\n",
64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (long)c, Normalizer2Impl::MAPPING_LENGTH_MASK);
64350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_INVALID_FORMAT_ERROR);
64450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t leadCC, trailCC;
64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length==0) {
64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        leadCC=trailCC=0;
64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        leadCC=getCC(m.char32At(0));
65050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        trailCC=getCC(m.char32At(length-1));
65150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(c<Normalizer2Impl::MIN_CCC_LCCC_CP && (p->cc!=0 || leadCC!=0)) {
65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "gennorm2 error: "
65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "U+%04lX below U+0300 has ccc!=0 or lccc!=0, not supported by ICU\n",
65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (long)c);
65750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_INVALID_FORMAT_ERROR);
65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t firstUnit=length|(trailCC<<8);
66050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t secondUnit=p->cc|(leadCC<<8);
66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(secondUnit!=0) {
66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        firstUnit|=Normalizer2Impl::MAPPING_HAS_CCC_LCCC_WORD;
66350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(p->compositions!=NULL) {
66550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        firstUnit|=Normalizer2Impl::MAPPING_PLUS_COMPOSITION_LIST;
66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(p->hasNoCompBoundaryAfter) {
66850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        firstUnit|=Normalizer2Impl::MAPPING_NO_COMP_BOUNDARY_AFTER;
66950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
67050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    dataString.append((UChar)firstUnit);
67150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(secondUnit!=0) {
67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataString.append((UChar)secondUnit);
67350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
67450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    dataString.append(m);
67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
67650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
67750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Requires p->compositions!=NULL.
67850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeCompositions(UChar32 c, const Norm *p, UnicodeString &dataString) {
67950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(p->cc!=0) {
68050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
68150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "gennorm2 error: "
68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "U+%04lX combines-forward and has ccc!=0, not possible in Unicode normalization\n",
68350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (long)c);
68450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_INVALID_FORMAT_ERROR);
68550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
68627f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t length;
68727f654740f2a26ad62a5c155af9199af9e69b889claireho    const CompositionPair *pairs=p->getCompositionPairs(length);
68850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(int32_t i=0; i<length; ++i) {
68927f654740f2a26ad62a5c155af9199af9e69b889claireho        const CompositionPair &pair=pairs[i];
69050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // 22 bits for the composite character and whether it combines forward.
69150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 compositeAndFwd=pair.composite<<1;
69250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(getNormRef(pair.composite).compositions!=NULL) {
69350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            compositeAndFwd|=1;  // The composite character also combines-forward.
69450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
69550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Encode most pairs in two units and some in three.
69650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t firstUnit, secondUnit, thirdUnit;
69750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(pair.trail<Normalizer2Impl::COMP_1_TRAIL_LIMIT) {
69850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(compositeAndFwd<=0xffff) {
69950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                firstUnit=pair.trail<<1;
70050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                secondUnit=compositeAndFwd;
70150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                thirdUnit=-1;
70250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
70350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                firstUnit=(pair.trail<<1)|Normalizer2Impl::COMP_1_TRIPLE;
70450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                secondUnit=compositeAndFwd>>16;
70550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                thirdUnit=compositeAndFwd;
70650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
70750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
70850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            firstUnit=(Normalizer2Impl::COMP_1_TRAIL_LIMIT+
70950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                       (pair.trail>>Normalizer2Impl::COMP_1_TRAIL_SHIFT))|
71050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                      Normalizer2Impl::COMP_1_TRIPLE;
71150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            secondUnit=(pair.trail<<Normalizer2Impl::COMP_2_TRAIL_SHIFT)|
71250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                       (compositeAndFwd>>16);
71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            thirdUnit=compositeAndFwd;
71450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
71550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Set the high bit of the first unit if this is the last composition pair.
71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(i==(length-1)) {
71750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            firstUnit|=Normalizer2Impl::COMP_1_LAST_TUPLE;
71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dataString.append((UChar)firstUnit).append((UChar)secondUnit);
72050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(thirdUnit>=0) {
72150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            dataString.append((UChar)thirdUnit);
72250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
72550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
72650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass ExtraDataWriter : public Normalizer2DBEnumerator {
72750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ExtraDataWriter(Normalizer2DataBuilder &b) :
72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        Normalizer2DBEnumerator(b),
73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        yesYesCompositions(1000, (UChar32)0xffff, 2),  // 0=inert, 1=Jamo L, 2=start of compositions
73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        yesNoData(1000, (UChar32)0, 1) {}  // 0=Hangul, 1=start of normal data
73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(value!=0) {
73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(start!=end) {
73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fprintf(stderr,
73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        "gennorm2 error: unexpected shared data for "
73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        "multiple code points U+%04lX..U+%04lX\n",
73850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (long)start, (long)end);
73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                exit(U_INTERNAL_PROGRAM_ERROR);
74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            builder.writeExtraData(start, value, *this);
74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
74450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
74550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString maybeYesCompositions;
74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString yesYesCompositions;
74750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString yesNoData;
74850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString noNoMappings;
74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Hashtable previousNoNoMappings;  // If constructed in runtime code, pass in UErrorCode.
75050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
75150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
75250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeExtraData(UChar32 c, uint32_t value, ExtraDataWriter &writer) {
75350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Norm *p=norms+value;
75450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(p->combinesBack) {
75550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p->hasMapping()) {
75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fprintf(stderr,
75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "gennorm2 error: "
75850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "U+%04lX combines-back and decomposes, not possible in Unicode normalization\n",
75950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (long)c);
76050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            exit(U_INVALID_FORMAT_ERROR);
76150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
76250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p->compositions!=NULL) {
76350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            p->offset=
76450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (writer.maybeYesCompositions.length()<<Norm::OFFSET_SHIFT)|
76550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                Norm::OFFSET_MAYBE_YES;
76650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            writeCompositions(c, p, writer.maybeYesCompositions);
76750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
76850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if(!p->hasMapping()) {
76950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p->compositions!=NULL) {
77050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            p->offset=
77150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (writer.yesYesCompositions.length()<<Norm::OFFSET_SHIFT)|
77250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                Norm::OFFSET_YES_YES;
77350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            writeCompositions(c, p, writer.yesYesCompositions);
77450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
77550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else if(p->mappingType==Norm::ROUND_TRIP) {
77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        p->offset=
77750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            (writer.yesNoData.length()<<Norm::OFFSET_SHIFT)|
77850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            Norm::OFFSET_YES_NO;
77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        writeMapping(c, p, writer.yesNoData);
78050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p->compositions!=NULL) {
78150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            writeCompositions(c, p, writer.yesNoData);
78250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
78350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else /* one-way */ {
78450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p->compositions!=NULL) {
78550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fprintf(stderr,
78650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "gennorm2 error: "
78750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "U+%04lX combines-forward and has a one-way mapping, "
78850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    "not possible in Unicode normalization\n",
78950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (long)c);
79050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            exit(U_INVALID_FORMAT_ERROR);
79150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
79250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p->cc==0 && optimization!=OPTIMIZE_FAST) {
79350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Try a compact, algorithmic encoding.
79450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Only for ccc=0, because we can't store additional information.
79550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(p->mappingCP>=0) {
79650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t delta=p->mappingCP-c;
79750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) {
79850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    p->offset=(delta<<Norm::OFFSET_SHIFT)|Norm::OFFSET_DELTA;
79950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
80050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
80150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
80250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p->offset==0) {
80350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t oldNoNoLength=writer.noNoMappings.length();
80450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            writeMapping(c, p, writer.noNoMappings);
80550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeString newMapping=writer.noNoMappings.tempSubString(oldNoNoLength);
80650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t previousOffset=writer.previousNoNoMappings.geti(newMapping);
80750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(previousOffset!=0) {
80850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Duplicate, remove the new units and point to the old ones.
80950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                writer.noNoMappings.truncate(oldNoNoLength);
81050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                p->offset=
81150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((previousOffset-1)<<Norm::OFFSET_SHIFT)|
81250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Norm::OFFSET_NO_NO;
81350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Enter this new mapping into the hashtable, avoiding value 0 which is "not found".
81550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.puti()");
81650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                writer.previousNoNoMappings.puti(newMapping, oldNoNoLength+1, errorCode);
81750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                p->offset=
81850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (oldNoNoLength<<Norm::OFFSET_SHIFT)|
81950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Norm::OFFSET_NO_NO;
82050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
82150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
82250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
82450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
82550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass Norm16Writer : public Normalizer2DBEnumerator {
82650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
82750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Norm16Writer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b) {}
82850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
82950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        builder.writeNorm16(start, end, value);
83050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
83150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
83250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
83350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
83450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeNorm16(UChar32 start, UChar32 end, uint32_t value) {
83550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(value!=0) {
83650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const Norm *p=norms+value;
83750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t offset=p->offset>>Norm::OFFSET_SHIFT;
83850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t norm16=0;
83950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UBool isDecompNo=FALSE;
84050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UBool isCompNoMaybe=FALSE;
84150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        switch(p->offset&Norm::OFFSET_MASK) {
84250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case Norm::OFFSET_NONE:
84350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // No mapping, no compositions list.
84450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(p->combinesBack) {
84550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                norm16=Normalizer2Impl::MIN_NORMAL_MAYBE_YES+p->cc;
84650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isDecompNo=(UBool)(p->cc!=0);
84750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isCompNoMaybe=TRUE;
84850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(p->cc!=0) {
84950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                norm16=Normalizer2Impl::MIN_YES_YES_WITH_CC-1+p->cc;
85050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isDecompNo=isCompNoMaybe=TRUE;
85150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
85250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
85350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case Norm::OFFSET_MAYBE_YES:
85450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]+offset;
85550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isCompNoMaybe=TRUE;
85650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
85750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case Norm::OFFSET_YES_YES:
85850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=offset;
85950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
86050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case Norm::OFFSET_YES_NO:
86150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=indexes[Normalizer2Impl::IX_MIN_YES_NO]+offset;
86250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isDecompNo=TRUE;
86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
86450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case Norm::OFFSET_NO_NO:
86550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=indexes[Normalizer2Impl::IX_MIN_NO_NO]+offset;
86650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isDecompNo=isCompNoMaybe=TRUE;
86750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
86850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case Norm::OFFSET_DELTA:
86950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=getCenterNoNoDelta()+offset;
87050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isDecompNo=isCompNoMaybe=TRUE;
87150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
87250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        default:  // Should not occur.
87350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            exit(U_INTERNAL_PROGRAM_ERROR);
87450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
87550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        IcuToolErrorCode errorCode("gennorm2/writeNorm16()");
87650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_setRange32(norm16Trie, start, end, (uint32_t)norm16, TRUE, errorCode);
87750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isDecompNo && start<indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]) {
87850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=start;
87950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
88050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isCompNoMaybe && start<indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]) {
88150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=start;
88250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
88350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
88450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
88550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
88650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::setHangulData() {
88750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    HangulIterator hi;
88850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const HangulIterator::Range *range;
88950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Check that none of the Hangul/Jamo code points have data.
89050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while((range=hi.nextRange())!=NULL) {
89150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(UChar32 c=range->start; c<range->limit; ++c) {
89250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(utrie2_get32(norm16Trie, c)!=0) {
89350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fprintf(stderr,
89450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        "gennorm2 error: "
89550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        "illegal mapping/composition/ccc data for Hangul or Jamo U+%04lX\n",
89650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (long)c);
89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                exit(U_INVALID_FORMAT_ERROR);
89850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Set data for algorithmic runtime handling.
90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IcuToolErrorCode errorCode("gennorm2/setHangulData()");
90350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    hi.reset();
90450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while((range=hi.nextRange())!=NULL) {
90550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t norm16=range->norm16;
90650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(norm16==0) {
90750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=(uint16_t)indexes[Normalizer2Impl::IX_MIN_YES_NO];  // Hangul LV/LVT encoded as minYesNo
90850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(range->start<indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]) {
90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=range->start;
91050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
91250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(range->start<indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]) {  // Jamo V/T are maybeYes
91350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=range->start;
91450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
91550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
91650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_setRange32(norm16Trie, range->start, range->limit-1, norm16, TRUE, errorCode);
91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode.assertSuccess();
91850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
91950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
92050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
92150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN
92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
92350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV
92450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumRangeMaxValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) {
92550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint32_t *pMaxValue=(uint32_t *)context;
92650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(value>*pMaxValue) {
92750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pMaxValue=value;
92850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
92950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
93050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
93150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
93250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END
93350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
93450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::processData() {
93550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IcuToolErrorCode errorCode("gennorm2/processData()");
93650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    norm16Trie=utrie2_open(0, 0, errorCode);
93750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    errorCode.assertSuccess();
93850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
93950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_enum(normTrie, NULL, enumRangeHandler, CompositionBuilder(*this).ptr());
94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Decomposer decomposer(*this);
94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        decomposer.didDecompose=FALSE;
94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_enum(normTrie, NULL, enumRangeHandler, &decomposer);
94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(decomposer.didDecompose);
94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    BuilderReorderingBuffer buffer;
94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t normsLength=utm_countItems(normMem);
94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(int32_t i=1; i<normsLength; ++i) {
95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(norms[i].hasMapping()) {
95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer.reset();
95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reorder(norms+i, buffer);
95350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norms[i].hasNoCompBoundaryAfter=hasNoCompBoundaryAfter(buffer);
95450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
95550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
95650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=0x110000;
95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=0x110000;
95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ExtraDataWriter extraDataWriter(*this);
96150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_enum(normTrie, NULL, enumRangeHandler, &extraDataWriter);
96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    extraData=extraDataWriter.maybeYesCompositions;
96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    extraData.append(extraDataWriter.yesYesCompositions).
96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              append(extraDataWriter.yesNoData).
96650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              append(extraDataWriter.noNoMappings);
96750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Pad to even length for 4-byte alignment of following data.
96850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(extraData.length()&1) {
96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        extraData.append((UChar)0);
97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
97150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    indexes[Normalizer2Impl::IX_MIN_YES_NO]=
97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        extraDataWriter.yesYesCompositions.length();
97450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    indexes[Normalizer2Impl::IX_MIN_NO_NO]=
97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        indexes[Normalizer2Impl::IX_MIN_YES_NO]+
97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        extraDataWriter.yesNoData.length();
97750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    indexes[Normalizer2Impl::IX_LIMIT_NO_NO]=
97850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        indexes[Normalizer2Impl::IX_MIN_NO_NO]+
97950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        extraDataWriter.noNoMappings.length();
98050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]=
98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        Normalizer2Impl::MIN_NORMAL_MAYBE_YES-
98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        extraDataWriter.maybeYesCompositions.length();
98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t minNoNoDelta=getCenterNoNoDelta()-Normalizer2Impl::MAX_DELTA;
98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(indexes[Normalizer2Impl::IX_LIMIT_NO_NO]>minNoNoDelta) {
98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr,
98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "gennorm2 error: "
98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                "data structure overflow, too much mapping composition data\n");
98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_BUFFER_OVERFLOW_ERROR);
99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
99250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_enum(normTrie, NULL, enumRangeHandler, Norm16Writer(*this).ptr());
99350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
99450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    setHangulData();
99550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
99650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Look for the "worst" norm16 value of any supplementary code point
99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // corresponding to a lead surrogate, and set it as that surrogate's value.
99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Enables quick check inner loops to look at only code units.
99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // We could be more sophisticated:
100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // We could collect a bit set for whether there are values in the different
100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // norm16 ranges (yesNo, maybeYes, yesYesWithCC etc.)
100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // and select the best value that only breaks the composition and/or decomposition
100450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // inner loops if necessary.
100550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // However, that seems like overkill for an optimization for supplementary characters.
100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(UChar lead=0xd800; lead<0xdc00; ++lead) {
100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint32_t maxValue=utrie2_get32(norm16Trie, lead);
100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_enumForLeadSurrogate(norm16Trie, lead, NULL, enumRangeMaxValue, &maxValue);
100950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if( maxValue>=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO] &&
101050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            maxValue>(uint32_t)indexes[Normalizer2Impl::IX_MIN_NO_NO]
101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ) {
101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0.
101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Otherwise it might end up at something like JAMO_VT which stays in
101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // the inner decomposition quick check loop.
101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            maxValue=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]-1;
101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_set32ForLeadSurrogateCodeUnit(norm16Trie, lead, maxValue, errorCode);
101850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
101950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
102050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Adjust supplementary minimum code points to break quick check loops at their lead surrogates.
102150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // For an empty data file, minCP=0x110000 turns into 0xdc00 (first trail surrogate)
102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // which is harmless.
102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // As a result, the minimum code points are always BMP code points.
102450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t minCP=indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP];
102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(minCP>=0x10000) {
102650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]=U16_LEAD(minCP);
102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
102850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minCP=indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP];
102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(minCP>=0x10000) {
103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]=U16_LEAD(minCP);
103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
103350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
103450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2DataBuilder::writeBinaryFile(const char *filename) {
103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    processData();
103650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    IcuToolErrorCode errorCode("gennorm2/writeBinaryFile()");
103850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_freeze(norm16Trie, UTRIE2_16_VALUE_BITS, errorCode);
103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t norm16TrieLength=utrie2_serialize(norm16Trie, NULL, 0, errorCode);
104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(errorCode.get()!=U_BUFFER_OVERFLOW_ERROR) {
104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "gennorm2 error: unable to freeze/serialize the normalization trie - %s\n",
104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                errorCode.errorName());
104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(errorCode.reset());
104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    errorCode.reset();
104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    LocalArray<uint8_t> norm16TrieBytes(new uint8_t[norm16TrieLength]);
104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_serialize(norm16Trie, norm16TrieBytes.getAlias(), norm16TrieLength, errorCode);
104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    errorCode.assertSuccess();
104950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
105050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t offset=(int32_t)sizeof(indexes);
105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]=offset;
105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    offset+=norm16TrieLength;
105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]=offset;
105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t totalSize=offset+=extraData.length()*2;
105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(int32_t i=Normalizer2Impl::IX_RESERVED2_OFFSET; i<=Normalizer2Impl::IX_TOTAL_SIZE; ++i) {
105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        indexes[i]=totalSize;
105750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(beVerbose) {
106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("size of normalization trie:         %5ld bytes\n", (long)norm16TrieLength);
106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("size of 16-bit extra data:          %5ld uint16_t\n", (long)extraData.length());
106250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("size of binary data file contents:  %5ld bytes\n", (long)totalSize);
106350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("minDecompNoCodePoint:              U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_DECOMP_NO_CP]);
106450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("minCompNoMaybeCodePoint:           U+%04lX\n", (long)indexes[Normalizer2Impl::IX_MIN_COMP_NO_MAYBE_CP]);
106550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("minYesNo:                          0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_YES_NO]);
106650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("minNoNo:                           0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_NO_NO]);
106750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("limitNoNo:                         0x%04x\n", (int)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]);
106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("minMaybeYes:                       0x%04x\n", (int)indexes[Normalizer2Impl::IX_MIN_MAYBE_YES]);
106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    memcpy(dataInfo.dataVersion, unicodeVersion, 4);
107250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UNewDataMemory *pData=
107350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        udata_create(NULL, NULL, filename, &dataInfo,
107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     haveCopyright ? U_COPYRIGHT_STRING : NULL, errorCode);
107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(errorCode.isFailure()) {
107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "gennorm2 error: unable to create the output file %s - %s\n",
107750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                filename, errorCode.errorName());
107850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(errorCode.reset());
107950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
108050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    udata_writeBlock(pData, indexes, sizeof(indexes));
108150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    udata_writeBlock(pData, norm16TrieBytes.getAlias(), norm16TrieLength);
108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    udata_writeUString(pData, extraData.getBuffer(), extraData.length());
108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t writtenSize=udata_finish(pData, errorCode);
108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(errorCode.isFailure()) {
108650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "gennorm2: error %s writing the output file\n", errorCode.errorName());
108750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(errorCode.reset());
108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(writtenSize!=totalSize) {
109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "gennorm2 error: written size %ld != calculated size %ld\n",
109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            (long)writtenSize, (long)totalSize);
109250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(U_INTERNAL_PROGRAM_ERROR);
109350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
109650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END
109750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
109850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif /* #if !UCONFIG_NO_NORMALIZATION */
109950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
110050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Hey, Emacs, please set the following:
110250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
110350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Local Variables:
110450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * indent-tabs-mode: nil
110550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * End:
110650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
1107