150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 2009-2011, International Business Machines
550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Corporation and others.  All Rights Reserved.
650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   file name:  normalizer2impl.cpp
950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   encoding:   US-ASCII
1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   tab size:   8 (not used)
1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   indentation:4
1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created on: 2009nov22
1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created by: Markus W. Scherer
1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/
1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h"
1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/normalizer2.h"
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/udata.h"
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h"
2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "cmemory.h"
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "mutex.h"
2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h"
2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uassert.h"
2827f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uhash.h"
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uset_imp.h"
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "utrie2.h"
3127f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uvector.h"
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// ReorderingBuffer -------------------------------------------------------- ***
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=str.length();
3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    start=str.getBuffer(destCapacity);
4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(start==NULL) {
4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // getBuffer() already did str.setToBogus()
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode=U_MEMORY_ALLOCATION_ERROR;
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    limit=start+length;
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity=str.getCapacity()-length;
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=start;
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(start==limit) {
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lastCC=0;
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        setIterator();
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lastCC=previousCC();
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Set reorderStart after the last code point with cc<=1 if there is one.
5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(lastCC>1) {
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            while(previousCC()>1) {}
5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reorderStart=codePointLimit;
5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const {
6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=(int32_t)(limit-start);
6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return
6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length==(int32_t)(otherLimit-otherStart) &&
6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        0==u_memcmp(start, otherStart, length);
6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(remainingCapacity<2 && !resize(2, errorCode)) {
7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(lastCC<=cc || cc==0) {
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit[0]=U16_LEAD(c);
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit[1]=U16_TRAIL(c);
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit+=2;
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lastCC=cc;
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(cc<=1) {
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reorderStart=limit;
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        insert(c, cc);
8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity-=2;
8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::append(const UChar *s, int32_t length,
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                               uint8_t leadCC, uint8_t trailCC,
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                               UErrorCode &errorCode) {
9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length==0) {
9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(remainingCapacity<length && !resize(length, errorCode)) {
9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity-=length;
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(lastCC<=leadCC || leadCC==0) {
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(trailCC<=1) {
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reorderStart=limit+length;
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(leadCC<=1) {
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reorderStart=limit+1;  // Ok if not a code point boundary.
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *sLimit=s+length;
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        do { *limit++=*s++; } while(s!=sLimit);
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lastCC=trailCC;
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t i=0;
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U16_NEXT(s, i, length, c);
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        insert(c, leadCC);  // insert first code point
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(i<length) {
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(s, i, length, c);
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(i<length) {
11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // s must be in NFD, otherwise we need to use getCC().
11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                leadCC=trailCC;
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            append(c, leadCC, errorCode);
12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t cpLength=U16_LENGTH(c);
12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) {
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity-=cpLength;
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(cpLength==1) {
13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *limit++=(UChar)c;
13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit[0]=U16_LEAD(c);
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit[1]=U16_TRAIL(c);
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit+=2;
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    lastCC=0;
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=limit;
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode) {
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(s==sLimit) {
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=(int32_t)(sLimit-s);
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(remainingCapacity<length && !resize(length, errorCode)) {
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_memcpy(limit, s, length);
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    limit+=length;
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity-=length;
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    lastCC=0;
15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=limit;
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::remove() {
16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=limit=start;
16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity=str.getCapacity();
16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    lastCC=0;
16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
16650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::removeSuffix(int32_t suffixLength) {
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(suffixLength<(limit-start)) {
16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit-=suffixLength;
16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        remainingCapacity+=suffixLength;
17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit=start;
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        remainingCapacity=str.getCapacity();
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    lastCC=0;
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=limit;
17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t reorderStartIndex=(int32_t)(reorderStart-start);
18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=(int32_t)(limit-start);
18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    str.releaseBuffer(length);
18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t newCapacity=length+appendLength;
18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t doubleCapacity=2*str.getCapacity();
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(newCapacity<doubleCapacity) {
18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        newCapacity=doubleCapacity;
18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(newCapacity<256) {
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        newCapacity=256;
18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    start=str.getBuffer(newCapacity);
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(start==NULL) {
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // getBuffer() already did str.setToBogus()
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode=U_MEMORY_ALLOCATION_ERROR;
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=start+reorderStartIndex;
19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    limit=start+length;
19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity=str.getCapacity()-length;
19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
20250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::skipPrevious() {
20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    codePointLimit=codePointStart;
20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar c=*--codePointStart;
20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) {
20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        --codePointStart;
20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
21050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouint8_t ReorderingBuffer::previousCC() {
21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    codePointLimit=codePointStart;
21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(reorderStart>=codePointStart) {
21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=*--codePointStart;
21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(c<Normalizer2Impl::MIN_CCC_LCCC_CP) {
21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar c2;
22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) {
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        --codePointStart;
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c=U16_GET_SUPPLEMENTARY(c2, c);
22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Inserts c somewhere before the last character.
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Requires 0<cc<lastCC which implies reorderStart<limit.
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::insert(UChar32 c, uint8_t cc) {
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(setIterator(), skipPrevious(); previousCC()>cc;) {}
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // insert c at codePointLimit, after the character with prevCC<=cc
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *q=limit;
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *r=limit+=U16_LENGTH(c);
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *--r=*--q;
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(codePointLimit!=q);
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    writeCodePoint(q, c);
23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(cc<=1) {
24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reorderStart=r;
24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Normalizer2Impl --------------------------------------------------------- ***
24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24627f654740f2a26ad62a5c155af9199af9e69b889clairehostruct CanonIterData : public UMemory {
24727f654740f2a26ad62a5c155af9199af9e69b889claireho    CanonIterData(UErrorCode &errorCode);
24827f654740f2a26ad62a5c155af9199af9e69b889claireho    ~CanonIterData();
24927f654740f2a26ad62a5c155af9199af9e69b889claireho    void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
25027f654740f2a26ad62a5c155af9199af9e69b889claireho    UTrie2 *trie;
25127f654740f2a26ad62a5c155af9199af9e69b889claireho    UVector canonStartSets;  // contains UnicodeSet *
25227f654740f2a26ad62a5c155af9199af9e69b889claireho};
25327f654740f2a26ad62a5c155af9199af9e69b889claireho
25450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::~Normalizer2Impl() {
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    udata_close(memory);
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_close(normTrie);
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UTrie2Singleton(fcdTrieSingleton).deleteInstance();
25827f654740f2a26ad62a5c155af9199af9e69b889claireho    delete (CanonIterData *)canonIterDataSingleton.fInstance;
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool U_CALLCONV
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::isAcceptable(void *context,
26327f654740f2a26ad62a5c155af9199af9e69b889claireho                              const char * /* type */, const char * /*name*/,
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                              const UDataInfo *pInfo) {
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->size>=20 &&
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->charsetFamily==U_CHARSET_FAMILY &&
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[1]==0x72 &&
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[2]==0x6d &&
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[3]==0x32 &&
27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->formatVersion[0]==1
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        Normalizer2Impl *me=(Normalizer2Impl *)context;
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(errorCode)) {
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(errorCode)) {
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const int32_t *inIndexes=(const int32_t *)inBytes;
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(indexesLength<=IX_MIN_MAYBE_YES) {
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minYesNo=inIndexes[IX_MIN_YES_NO];
30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minNoNo=inIndexes[IX_MIN_NO_NO];
30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    limitNoNo=inIndexes[IX_LIMIT_NO_NO];
30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       inBytes+offset, nextOffset-offset, NULL,
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       &errorCode);
31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(errorCode)) {
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    offset=nextOffset;
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    maybeYesCompositions=(const uint16_t *)(inBytes+offset);
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouint8_t Normalizer2Impl::getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const {
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c;
32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(cpStart==(cpLimit-1)) {
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c=*cpStart;
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]);
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t prevNorm16=getNorm16(c);
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(prevNorm16<=minYesNo) {
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;  // yesYes and Hangul LV/LVT have ccc=tccc=0
33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return (uint8_t)(*getMapping(prevNorm16)>>8);  // tccc from yesNo
33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
33950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV
34050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* add the start code point to the USet */
34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const USetAdder *sa=(const USetAdder *)context;
34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    sa->add(sa->set, start);
34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
34727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic uint32_t U_CALLCONV
34827f654740f2a26ad62a5c155af9199af9e69b889clairehosegmentStarterMapper(const void * /*context*/, uint32_t value) {
34927f654740f2a26ad62a5c155af9199af9e69b889claireho    return value&CANON_NOT_SEGMENT_STARTER;
35027f654740f2a26ad62a5c155af9199af9e69b889claireho}
35127f654740f2a26ad62a5c155af9199af9e69b889claireho
35250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END
35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid
35527f654740f2a26ad62a5c155af9199af9e69b889clairehoNormalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* add the start code point of each same-value range of each trie */
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_enum(normTrie, NULL, enumPropertyStartsRange, sa);
35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* add Hangul LV syllables and LV+1 because of skippables */
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sa->add(sa->set, c);
36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sa->add(sa->set, c+1);
36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
36727f654740f2a26ad62a5c155af9199af9e69b889clairehovoid
36827f654740f2a26ad62a5c155af9199af9e69b889clairehoNormalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
36927f654740f2a26ad62a5c155af9199af9e69b889claireho    /* add the start code point of each same-value range of the canonical iterator data trie */
37027f654740f2a26ad62a5c155af9199af9e69b889claireho    if(ensureCanonIterData(errorCode)) {
37127f654740f2a26ad62a5c155af9199af9e69b889claireho        // currently only used for the SEGMENT_STARTER property
37227f654740f2a26ad62a5c155af9199af9e69b889claireho        utrie2_enum(((CanonIterData *)canonIterDataSingleton.fInstance)->trie,
37327f654740f2a26ad62a5c155af9199af9e69b889claireho                    segmentStarterMapper, enumPropertyStartsRange, sa);
37427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
37527f654740f2a26ad62a5c155af9199af9e69b889claireho}
37627f654740f2a26ad62a5c155af9199af9e69b889claireho
37750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
37850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src,
37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                UChar32 minNeedDataCP,
38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                ReorderingBuffer *buffer,
38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                UErrorCode &errorCode) const {
38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Make some effort to support NUL-terminated strings reasonably.
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Take the part of the fast quick check loop that does not look up
38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // data and check the first part of the string.
38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // After this prefix, determine the string length to simplify the rest
38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // of the code.
38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc=src;
38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar c;
38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while((c=*src++)<minNeedDataCP && c!=0) {}
39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Back out the last character for full processing.
39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Copy this prefix.
39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(--src!=prevSrc) {
39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(buffer!=NULL) {
39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer->appendZeroCC(prevSrc, src, errorCode);
39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return src;
39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Dual functionality:
40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer!=NULL: normalize
40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer==NULL: isNormalized/spanQuickCheckYes
40350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
40450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::decompose(const UChar *src, const UChar *limit,
40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           ReorderingBuffer *buffer,
40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           UErrorCode &errorCode) const {
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 minNoCP=minDecompNoCP;
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(limit==NULL) {
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(U_FAILURE(errorCode)) {
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return src;
41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit=u_strchr(src, 0);
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc;
41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=0;
41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16=0;
41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // only for quick check
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevBoundary=src;
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t prevCC=0;
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // count code units below the minimum or with irrelevant data for the quick check
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(prevSrc=src; src!=limit;) {
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( (c=*src)<minNoCP ||
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(!U16_IS_SURROGATE(c)) {
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c2;
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_SURROGATE_LEAD(c)) {
43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c, c2);
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else /* trail surrogate */ {
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --src;
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c2, c);
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    src+=U16_LENGTH(c);
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // copy these code units all at once
45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=prevSrc) {
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(buffer!=NULL) {
45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {
45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC=0;
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary=src;
46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src==limit) {
46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Check one above-minimum, relevant code point.
46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src+=U16_LENGTH(c);
46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(buffer!=NULL) {
47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!decompose(c, norm16, *buffer, errorCode)) {
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(isDecompYes(norm16)) {
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                uint8_t cc=getCCFromYesOrMaybe(norm16);
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(prevCC<=cc || cc==0) {
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevCC=cc;
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(cc<=1) {
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        prevBoundary=src;
48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return prevBoundary;  // "no" or cc out of order
48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return src;
48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Decompose a short piece of text which is likely to contain characters that
49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// fail the quick check loop and/or where the quick check loop's overhead
49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// is unlikely to be amortized.
49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Called by the compose() and makeFCD() implementations.
49450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,
49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                      ReorderingBuffer &buffer,
49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                      UErrorCode &errorCode) const {
49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(src<limit) {
49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
49950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t norm16;
50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(!decompose(c, norm16, buffer, errorCode)) {
50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
50650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
50850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                 ReorderingBuffer &buffer,
51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                 UErrorCode &errorCode) const {
51150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Only loops for 1:1 algorithmic mappings.
51250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
51350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // get the decomposition and the lead and trail cc's
51450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isDecompYes(norm16)) {
51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c does not decompose
51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isHangul(norm16)) {
51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Hangul syllable: decompose algorithmically
51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar jamos[3];
52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=getNorm16(c);
52450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint16_t firstUnit=*mapping++;
52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t length=firstUnit&MAPPING_LENGTH_MASK;
52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint8_t leadCC, trailCC;
53050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            trailCC=(uint8_t)(firstUnit>>8);
53150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                leadCC=(uint8_t)(*mapping++>>8);
53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                leadCC=0;
53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return buffer.append((const UChar *)mapping, length, leadCC, trailCC, errorCode);
53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
54150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
54250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const {
54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *decomp=NULL;
54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16;
54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c does not decompose
54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return decomp;
54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isHangul(norm16)) {
55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Hangul syllable: decompose algorithmically
55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            length=Hangul::decompose(c, buffer);
55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return buffer;
55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            decomp=buffer;
55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            length=0;
55750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_APPEND_UNSAFE(buffer, length, c);
55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
55950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
56050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
56150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint16_t firstUnit=*mapping++;
56250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            length=firstUnit&MAPPING_LENGTH_MASK;
56350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++mapping;
56550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return (const UChar *)mapping;
56750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
57150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                         UBool doDecompose,
573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                         UnicodeString &safeMiddle,
57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                         ReorderingBuffer &buffer,
57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                         UErrorCode &errorCode) const {
576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    buffer.copyReorderableSuffixTo(safeMiddle);
57750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(doDecompose) {
57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        decompose(src, limit, &buffer, errorCode);
57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Just merge the strings at the boundary.
58250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ForwardUTrie2StringIterator iter(normTrie, src, limit);
58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t firstCC, prevCC, cc;
58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    firstCC=prevCC=cc=getCC(iter.next16());
58550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(cc!=0) {
58650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        prevCC=cc;
58750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cc=getCC(iter.next16());
58850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
589b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
590b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        limit=u_strchr(iter.codePointStart, 0);
591b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode) &&
59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        buffer.appendZeroCC(iter.codePointStart, limit, errorCode);
59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
59550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Note: hasDecompBoundary() could be implemented as aliases to
59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// at the cost of building the FCD trie for a decomposition normalizer.
59950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasDecompBoundary(UChar32 c, UBool before) const {
60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(c<minDecompNoCP) {
60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t norm16=getNorm16(c);
60550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
60750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(norm16>MIN_NORMAL_MAYBE_YES) {
60850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;  // ccc!=0
60950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
61050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
61150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
61250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
61350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
61450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint16_t firstUnit=*mapping++;
61550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
61650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
61750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
61850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!before) {
61950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // decomp after-boundary: same as hasFCDBoundaryAfter(),
62050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // fcd16<=1 || trailCC==0
62150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(firstUnit>0x1ff) {
62250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;  // trailCC>1
62350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
62450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(firstUnit<=0xff) {
62550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return TRUE;  // trailCC==0
62650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
62750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // if(trailCC==1) test leadCC==0, same as checking for before-boundary
62850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
62950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // TRUE if leadCC==0 (hasFCDBoundaryBefore())
63050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*mapping&0xff00)==0;
63150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
63250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
63350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
63450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
63550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
63650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Finds the recomposition result for
63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a forward-combining "lead" character,
63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * specified with a pointer to its compositions list,
63950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and a backward-combining "trail" character.
64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
64150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the lead and trail characters combine, then this function returns
64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the following "compositeAndFwd" value:
64350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Bits 21..1  composite character
64450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Bit      0  set if the composite is a forward-combining starter
64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * otherwise it returns -1.
64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The compositions list has (trail, compositeAndFwd) pair entries,
64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * encoded as either pairs or triples of 16-bit units.
64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The last entry has the high bit of its first unit set.
65050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
65150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The list is sorted by ascending trail characters (there are no duplicates).
65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * A linear search is used.
65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * See normalizer2impl.h for a more detailed description
65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * of the compositions list format.
65650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
65750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t key1, firstUnit;
65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(trail<COMP_1_TRAIL_LIMIT) {
66050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // trail character is 0..33FF
66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // result entry may have 2 or 3 units
66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        key1=(uint16_t)(trail<<1);
66350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(key1>(firstUnit=*list)) {
66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            list+=2+(firstUnit&COMP_1_TRIPLE);
66550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(firstUnit&COMP_1_TRIPLE) {
66850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return ((int32_t)list[1]<<16)|list[2];
66950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
67050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return list[1];
67150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
67350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
67450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // trail character is 3400..10FFFF
67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // result entry has 3 units
67650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
67727f654740f2a26ad62a5c155af9199af9e69b889claireho                        (((trail>>COMP_1_TRAIL_SHIFT))&
67827f654740f2a26ad62a5c155af9199af9e69b889claireho                          ~COMP_1_TRIPLE));
67950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
68050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t secondUnit;
68150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(;;) {
68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(key1>(firstUnit=*list)) {
68350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                list+=2+(firstUnit&COMP_1_TRIPLE);
68450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
68550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(key2>(secondUnit=list[1])) {
68650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(firstUnit&COMP_1_LAST_TUPLE) {
68750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
68850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
68950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        list+=3;
69050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
69150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
69250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2];
69350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
69450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
69550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
69650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
69750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
69850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
69950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
70050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
70150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return -1;
70250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
70350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
70427f654740f2a26ad62a5c155af9199af9e69b889claireho/**
70527f654740f2a26ad62a5c155af9199af9e69b889claireho  * @param list some character's compositions list
70627f654740f2a26ad62a5c155af9199af9e69b889claireho  * @param set recursively receives the composites from these compositions
70727f654740f2a26ad62a5c155af9199af9e69b889claireho  */
70827f654740f2a26ad62a5c155af9199af9e69b889clairehovoid Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const {
70927f654740f2a26ad62a5c155af9199af9e69b889claireho    uint16_t firstUnit;
71027f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t compositeAndFwd;
71127f654740f2a26ad62a5c155af9199af9e69b889claireho    do {
71227f654740f2a26ad62a5c155af9199af9e69b889claireho        firstUnit=*list;
71327f654740f2a26ad62a5c155af9199af9e69b889claireho        if((firstUnit&COMP_1_TRIPLE)==0) {
71427f654740f2a26ad62a5c155af9199af9e69b889claireho            compositeAndFwd=list[1];
71527f654740f2a26ad62a5c155af9199af9e69b889claireho            list+=2;
71627f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
71727f654740f2a26ad62a5c155af9199af9e69b889claireho            compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2];
71827f654740f2a26ad62a5c155af9199af9e69b889claireho            list+=3;
71927f654740f2a26ad62a5c155af9199af9e69b889claireho        }
72027f654740f2a26ad62a5c155af9199af9e69b889claireho        UChar32 composite=compositeAndFwd>>1;
72127f654740f2a26ad62a5c155af9199af9e69b889claireho        if((compositeAndFwd&1)!=0) {
72227f654740f2a26ad62a5c155af9199af9e69b889claireho            addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
72327f654740f2a26ad62a5c155af9199af9e69b889claireho        }
72427f654740f2a26ad62a5c155af9199af9e69b889claireho        set.add(composite);
72527f654740f2a26ad62a5c155af9199af9e69b889claireho    } while((firstUnit&COMP_1_LAST_TUPLE)==0);
72627f654740f2a26ad62a5c155af9199af9e69b889claireho}
72727f654740f2a26ad62a5c155af9199af9e69b889claireho
72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Recomposes the buffer text starting at recomposeStartIndex
73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (which is in NFD - decomposed and canonically ordered),
73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and truncates the buffer contents.
73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that recomposition never lengthens the text:
73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Any character consists of either one or two code units;
73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a composition may contain at most one more code unit than the original starter,
73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * while the combining mark that is removed has at least one code unit.
73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
73850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                UBool onlyContiguous) const {
74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *p=buffer.getStart()+recomposeStartIndex;
74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *limit=buffer.getLimit();
74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(p==limit) {
74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
74450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
74550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *starter, *pRemove, *q, *r;
74750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint16_t *compositionsList;
74850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c, compositeAndFwd;
74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16;
75050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t cc, prevCC;
75150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool starterIsSupplementary;
75250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
75350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Some of the following variables are not used until we have a forward-combining starter
75450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // and are only initialized now to avoid compiler warnings.
75550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    compositionsList=NULL;  // used as indicator for whether we have a forward-combining starter
75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    starter=NULL;
75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    starterIsSupplementary=FALSE;
75850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    prevCC=0;
75950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
76050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
76150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
76250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cc=getCCFromYesOrMaybe(norm16);
76350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if( // this character combines backward and
76450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMaybe(norm16) &&
76550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // we have seen a starter that combines forward and
76650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            compositionsList!=NULL &&
76750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // the backward-combining character is not blocked
76850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            (prevCC<cc || prevCC==0)
76950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ) {
77050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(isJamoVT(norm16)) {
77150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // c is a Jamo V/T, see if we can compose it with the previous character.
77250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(c<Hangul::JAMO_T_BASE) {
77350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
77450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE);
77550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prev<Hangul::JAMO_L_COUNT) {
77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        pRemove=p-1;
77750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar syllable=(UChar)
77850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            (Hangul::HANGUL_BASE+
77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
78050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             Hangul::JAMO_T_COUNT);
78150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar t;
78250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
78350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            ++p;
78450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            syllable+=t;  // The next character was a Jamo T.
78550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
78650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *starter=syllable;
78750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // remove the Jamo V/T
78850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        q=pRemove;
78950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        r=p;
79050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        while(r<limit) {
79150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            *q++=*r++;
79250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
79350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        limit=q;
79450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        p=pRemove;
79550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
79650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
79750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                /*
79850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 * No "else" for Jamo T:
79950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 * Since the input is in NFD, there are no Hangul LV syllables that
80050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 * a Jamo T could combine with.
80150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 * All Jamo Ts are combined above when handling Jamo Vs.
80250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 */
80350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(p==limit) {
80450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
80550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
80650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                compositionsList=NULL;
80750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
80850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if((compositeAndFwd=combine(compositionsList, c))>=0) {
80950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The starter and the combining mark (c) do combine.
81050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 composite=compositeAndFwd>>1;
81150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
81250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Replace the starter with the composite, remove the combining mark.
81350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                pRemove=p-U16_LENGTH(c);  // pRemove & p: start & limit of the combining mark
81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(starterIsSupplementary) {
81550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(U_IS_SUPPLEMENTARY(composite)) {
81650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // both are supplementary
81750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        starter[0]=U16_LEAD(composite);
81850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        starter[1]=U16_TRAIL(composite);
81950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
82050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *starter=(UChar)composite;
82150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // The composite is shorter than the starter,
82250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // move the intermediate characters forward one.
82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        starterIsSupplementary=FALSE;
82450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        q=starter+1;
82550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        r=q+1;
82650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        while(r<pRemove) {
82750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            *q++=*r++;
82850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
82950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --pRemove;
83050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
83150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else if(U_IS_SUPPLEMENTARY(composite)) {
83250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The composite is longer than the starter,
83350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // move the intermediate characters back one.
83450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starterIsSupplementary=TRUE;
83550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ++starter;  // temporarily increment for the loop boundary
83650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    q=pRemove;
83750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    r=++pRemove;
83850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while(starter<q) {
83950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *--r=*--q;
84050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
84150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *starter=U16_TRAIL(composite);
84250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *--starter=U16_LEAD(composite);  // undo the temporary increment
84350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
84450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // both are on the BMP
84550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *starter=(UChar)composite;
84650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
84750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
84850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                /* remove the combining mark by moving the following text over it */
84950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(pRemove<p) {
85050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    q=pRemove;
85150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    r=p;
85250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while(r<limit) {
85350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *q++=*r++;
85450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
85550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    limit=q;
85650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    p=pRemove;
85750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
85850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Keep prevCC because we removed the combining mark.
85950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
86050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(p==limit) {
86150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
86250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Is the composite a starter that combines forward?
86450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(compositeAndFwd&1) {
86550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    compositionsList=
86650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        getCompositionsListForComposite(getNorm16(composite));
86750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
86850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    compositionsList=NULL;
86950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
87050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
87150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // We combined; continue with looking for compositions.
87250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
87350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
87450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
87550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
87650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // no combination this time
87750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        prevCC=cc;
87850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p==limit) {
87950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
88050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
88150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
88250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // If c did not combine, then check if it is a starter.
88350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(cc==0) {
88450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Found a new starter.
88550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) {
88650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // It may combine with something, prepare for it.
88750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U_IS_BMP(c)) {
88850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starterIsSupplementary=FALSE;
88950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starter=p-1;
89050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
89150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starterIsSupplementary=TRUE;
89250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starter=p-2;
89350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
89450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
89550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(onlyContiguous) {
89650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // FCC: no discontiguous compositions; any intervening character blocks.
89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            compositionsList=NULL;
89850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    buffer.setReorderingLimit(limit);
90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
90350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
90450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// doCompose: normalize
90550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// !doCompose: isNormalized (buffer must be empty and initialized)
90650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool
90750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::compose(const UChar *src, const UChar *limit,
90850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UBool onlyContiguous,
90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UBool doCompose,
91050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         ReorderingBuffer &buffer,
91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UErrorCode &errorCode) const {
91250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
91350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * prevBoundary points to the last character before the current one
91450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * that has a composition boundary before it with ccc==0 and quick check "yes".
91550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Keeping track of prevBoundary saves us looking for a composition boundary
91650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * when we find a "no" or "maybe".
91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
91850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * When we back out from prevSrc back to prevBoundary,
91950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * then we also remove those same characters (which had been simply copied
92050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * or canonically-order-inserted) from the ReorderingBuffer.
92150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Therefore, at all times, the [prevBoundary..prevSrc[ source units
92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * must correspond 1:1 to destination units at the end of the destination buffer.
92350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
92450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevBoundary=src;
92527f654740f2a26ad62a5c155af9199af9e69b889claireho    UChar32 minNoMaybeCP=minCompNoMaybeCP;
92627f654740f2a26ad62a5c155af9199af9e69b889claireho    if(limit==NULL) {
92727f654740f2a26ad62a5c155af9199af9e69b889claireho        src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP,
92827f654740f2a26ad62a5c155af9199af9e69b889claireho                                           doCompose ? &buffer : NULL,
92927f654740f2a26ad62a5c155af9199af9e69b889claireho                                           errorCode);
93027f654740f2a26ad62a5c155af9199af9e69b889claireho        if(U_FAILURE(errorCode)) {
93127f654740f2a26ad62a5c155af9199af9e69b889claireho            return FALSE;
93227f654740f2a26ad62a5c155af9199af9e69b889claireho        }
93327f654740f2a26ad62a5c155af9199af9e69b889claireho        if(prevBoundary<src) {
93427f654740f2a26ad62a5c155af9199af9e69b889claireho            // Set prevBoundary to the last character in the prefix.
93527f654740f2a26ad62a5c155af9199af9e69b889claireho            prevBoundary=src-1;
93627f654740f2a26ad62a5c155af9199af9e69b889claireho        }
93727f654740f2a26ad62a5c155af9199af9e69b889claireho        limit=u_strchr(src, 0);
93827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
93927f654740f2a26ad62a5c155af9199af9e69b889claireho
94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc;
94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=0;
94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16=0;
94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // only for isNormalized
94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t prevCC=0;
94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // count code units below the minimum or with irrelevant data for the quick check
94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(prevSrc=src; src!=limit;) {
95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( (c=*src)<minNoMaybeCP ||
95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
95350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
95450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(!U16_IS_SURROGATE(c)) {
95550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
95650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c2;
95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_SURROGATE_LEAD(c)) {
95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c, c2);
96150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else /* trail surrogate */ {
96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --src;
96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c2, c);
96650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
96750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
96850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    src+=U16_LENGTH(c);
97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
97150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
97450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // copy these code units all at once
97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=prevSrc) {
97750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(doCompose) {
97850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!buffer.appendZeroCC(prevSrc, src, errorCode)) {
97950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
98050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC=0;
98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(src==limit) {
98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Set prevBoundary to the last character in the quick check loop.
98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=src-1;
98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_IS_LEAD(*(prevBoundary-1))
99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
99250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                --prevBoundary;
99350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
99450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // The start of the current character (c).
99550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevSrc=src;
99650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(src==limit) {
99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src+=U16_LENGTH(c);
100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
100450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * or has ccc!=0.
100550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Check for Jamo V/T, then for regular characters.
100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * c is not a Hangul syllable or Jamo L because those have "yes" properties.
100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
100950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar prev=*(prevSrc-1);
101050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UBool needToDecompose=FALSE;
101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(c<Hangul::JAMO_T_BASE) {
101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prev=(UChar)(prev-Hangul::JAMO_L_BASE);
101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(prev<Hangul::JAMO_L_COUNT) {
101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(!doCompose) {
101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return FALSE;
101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
101850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar syllable=(UChar)
101950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (Hangul::HANGUL_BASE+
102050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
102150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         Hangul::JAMO_T_COUNT);
102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar t;
102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(src!=limit && (t=(UChar)(*src-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
102450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        ++src;
102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        syllable+=t;  // The next character was a Jamo T.
102650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        prevBoundary=src;
102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        buffer.setLastChar(syllable);
102850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        continue;
102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // If we see L+V+x where x!=T then we drop to the slow path,
103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // decompose and recompose.
103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // This is to deal with NFKC finding normal L and V but a
103350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // compatibility variant of a T. We need to either fully compose that
103450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // combination here (which would complicate the code and may not work
103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // with strange custom data) or use the slow path -- or else our replacing
103650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // two input characters (L+V) with one output character (LV syllable)
103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // would violate the invariant that [prevBoundary..prevSrc[ has the same
103850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // length as what we appended to the buffer since prevBoundary.
103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    needToDecompose=TRUE;
104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(Hangul::isHangulWithoutJamoT(prev)) {
104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // c is a Jamo Trailing consonant,
104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // compose with previous Hangul LV that does not contain a Jamo T.
104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!doCompose) {
104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                buffer.setLastChar((UChar)(prev+c-Hangul::JAMO_T_BASE));
104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary=src;
104950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
105050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!needToDecompose) {
105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The Jamo V/T did not compose into a Hangul syllable.
105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(doCompose) {
105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(!buffer.appendBMP((UChar)c, 0, errorCode)) {
105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
105750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevCC=0;
105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
106250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
106350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
106450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Source buffer pointers:
106550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
106650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *  all done      quick check   current char  not yet
106750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                "yes" but     (c)           processed
106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                may combine
106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                forward
107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * [-------------[-------------[-------------[-------------[
107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * |             |             |             |             |
107250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * orig. src     prevBoundary  prevSrc       src           limit
107350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Destination buffer pointers inside the ReorderingBuffer:
107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
107750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *  all done      might take    not filled yet
107850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                characters for
107950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                reordering
108050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * [-------------[-------------[-------------[
108150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * |             |             |             |
108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * start         reorderStart  limit         |
108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                             +remainingCap.+
108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(norm16>=MIN_YES_YES_WITH_CC) {
108650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint8_t cc=(uint8_t)norm16;  // cc!=0
108750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( onlyContiguous &&  // FCC
108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (doCompose ? buffer.getLastCC() : prevCC)==0 &&
108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary<prevSrc &&
109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
109250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // passed the quick check "yes && ccc==0" test.
109350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Check whether the last character was a "yesYes" or a "yesNo".
109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If a "yesNo", then we get its trailing ccc from its
109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // mapping and check for canonical order.
109650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // All other cases are ok.
109750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc
109850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
109950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fails FCD test, need to decompose and contiguously recompose.
110050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!doCompose) {
110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
110250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
110350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(doCompose) {
110450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!buffer.append(c, cc, errorCode)) {
110550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
110650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
110750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
110850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(prevCC<=cc) {
110950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC=cc;
111050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
111150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
111250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
111350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
111450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
111550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
111650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
111750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
111850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
111950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Find appropriate boundaries around this character,
112050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * decompose the source text from between the boundaries,
112150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * and recompose it.
112250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
112350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * We may need to remove the last few characters from the ReorderingBuffer
112450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * to account for source text that was copied or appended
112550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * but needs to take part in the recomposition.
112650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
112750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
112850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
112950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Find the last composition boundary in [prevBoundary..src[.
113050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * It is either the decomposition of the current character (at prevSrc),
113150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * or prevBoundary.
113250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
113350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(hasCompBoundaryBefore(c, norm16)) {
113450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=prevSrc;
113550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(doCompose) {
113650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer.removeSuffix((int32_t)(prevSrc-prevBoundary));
113750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
113850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
113950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Find the next composition boundary in [src..limit[ -
114050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // modifies src to point to the next starter.
114150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src=(UChar *)findNextCompBoundary(src, limit);
114250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
114350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
114450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t recomposeStartIndex=buffer.length();
114550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(!decomposeShort(prevBoundary, src, buffer, errorCode)) {
114650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
114750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
114850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        recompose(buffer, recomposeStartIndex, onlyContiguous);
114950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(!doCompose) {
115050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!buffer.equals(prevBoundary, src)) {
115150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
115250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
115350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer.remove();
115450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCC=0;
115550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
115650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
115750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Move to the next starter. We never need to look back before this point again.
115850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        prevBoundary=src;
115950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
116050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
116150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
116250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
116350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Very similar to compose(): Make the same changes in both places if relevant.
116450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pQCResult==NULL: spanQuickCheckYes
116550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES)
116650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
116750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
116850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                   UBool onlyContiguous,
116950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                   UNormalizationCheckResult *pQCResult) const {
117027f654740f2a26ad62a5c155af9199af9e69b889claireho    /*
117127f654740f2a26ad62a5c155af9199af9e69b889claireho     * prevBoundary points to the last character before the current one
117227f654740f2a26ad62a5c155af9199af9e69b889claireho     * that has a composition boundary before it with ccc==0 and quick check "yes".
117327f654740f2a26ad62a5c155af9199af9e69b889claireho     */
117427f654740f2a26ad62a5c155af9199af9e69b889claireho    const UChar *prevBoundary=src;
117550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 minNoMaybeCP=minCompNoMaybeCP;
117650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(limit==NULL) {
117750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode errorCode=U_ZERO_ERROR;
117850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode);
117927f654740f2a26ad62a5c155af9199af9e69b889claireho        if(prevBoundary<src) {
118027f654740f2a26ad62a5c155af9199af9e69b889claireho            // Set prevBoundary to the last character in the prefix.
118127f654740f2a26ad62a5c155af9199af9e69b889claireho            prevBoundary=src-1;
118227f654740f2a26ad62a5c155af9199af9e69b889claireho        }
118350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit=u_strchr(src, 0);
118450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
118550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
118650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc;
118750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=0;
118850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16=0;
118950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t prevCC=0;
119050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
119150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
119250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // count code units below the minimum or with irrelevant data for the quick check
119350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(prevSrc=src;;) {
119450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(src==limit) {
119550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return src;
119650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
119750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( (c=*src)<minNoMaybeCP ||
119850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
119950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
120050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
120150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(!U16_IS_SURROGATE(c)) {
120250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
120350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
120450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c2;
120550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_SURROGATE_LEAD(c)) {
120650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
120750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c, c2);
120850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
120950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else /* trail surrogate */ {
121050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
121150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --src;
121250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c2, c);
121350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
121450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
121550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
121650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    src+=U16_LENGTH(c);
121750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
121850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
121950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
122050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
122150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
122250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=prevSrc) {
122350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Set prevBoundary to the last character in the quick check loop.
122450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=src-1;
122550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
122650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_IS_LEAD(*(prevBoundary-1))
122750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
122850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                --prevBoundary;
122950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
123050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCC=0;
123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // The start of the current character (c).
123250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevSrc=src;
123350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
123450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
123550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src+=U16_LENGTH(c);
123650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
123750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
123850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
123950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * or has ccc!=0.
124050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
124150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isMaybeOrNonZeroCC(norm16)) {
124250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint8_t cc=getCCFromYesOrMaybe(norm16);
124350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( onlyContiguous &&  // FCC
124450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                cc!=0 &&
124550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC==0 &&
124650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary<prevSrc &&
124750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // prevCC==0 && prevBoundary<prevSrc tell us that
124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
124950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // passed the quick check "yes && ccc==0" test.
125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Check whether the last character was a "yesYes" or a "yesNo".
125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If a "yesNo", then we get its trailing ccc from its
125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // mapping and check for canonical order.
125350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // All other cases are ok.
125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc
125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
125650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fails FCD test.
125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(prevCC<=cc || cc==0) {
125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC=cc;
125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(norm16<MIN_YES_YES_WITH_CC) {
126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(pQCResult!=NULL) {
126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *pQCResult=UNORM_MAYBE;
126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return prevBoundary;
126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(pQCResult!=NULL) {
127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pQCResult=UNORM_NO;
127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return prevBoundary;
127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
127650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit,
127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UBool doCompose,
127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UBool onlyContiguous,
1279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                       UnicodeString &safeMiddle,
128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       ReorderingBuffer &buffer,
128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UErrorCode &errorCode) const {
128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!buffer.isEmpty()) {
128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *firstStarterInSrc=findNextCompBoundary(src, limit);
128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=firstStarterInSrc) {
128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                                    buffer.getLimit());
1287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
1288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UnicodeString middle(lastStarterInDest, destSuffixLength);
1289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            buffer.removeSuffix(destSuffixLength);
1290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            safeMiddle=middle;
129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            middle.append(src, (int32_t)(firstStarterInSrc-src));
129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *middleStart=middle.getBuffer();
129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            compose(middleStart, middleStart+middle.length(), onlyContiguous,
129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    TRUE, buffer, errorCode);
129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(U_FAILURE(errorCode)) {
129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return;
129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            src=firstStarterInSrc;
129950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(doCompose) {
130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
1304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
1305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            limit=u_strchr(src, 0);
1306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        buffer.appendZeroCC(src, limit, errorCode);
130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Does c have a composition boundary before it?
131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * True if its decomposition begins with a character that has
131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (isCompYesAndZeroCC()) so we need not decompose.
131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
131850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isCompYesAndZeroCC(norm16)) {
132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isMaybeOrNonZeroCC(norm16)) {
132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=getNorm16(c);
132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint16_t firstUnit=*mapping++;
133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD) && (*mapping++&0xff00)) {
133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;  // non-zero leadCC
133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t i=0;
133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar32 c;
133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT_UNSAFE(mapping, i, c);
134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return isCompYesAndZeroCC(getNorm16(c));
134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
134550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const {
134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t norm16=getNorm16(c);
134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isInert(norm16)) {
134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(norm16<=minYesNo) {
135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Hangul LVT (==minYesNo) has a boundary after it.
135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Hangul LV and non-inert yesYes characters combine forward.
135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return isHangul(norm16) && !Hangul::isHangulWithoutJamoT((UChar)c);
135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) {
135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data.
136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // If testInert, then c must be a yesNo character which has lccc=0,
136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // otherwise it could be a noNo.
136250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint16_t firstUnit=*mapping;
136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // TRUE if
136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //      c is not deleted, and
136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //      it and its decomposition do not combine forward, and it has a starter, and
136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //      if FCC then trailCC<=1
136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return
136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (firstUnit&MAPPING_LENGTH_MASK)!=0 &&
137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (firstUnit&(MAPPING_PLUS_COMPOSITION_LIST|MAPPING_NO_COMP_BOUNDARY_AFTER))==0 &&
137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (!onlyContiguous || firstUnit<=0x1ff);
137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
137650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p) const {
137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    BackwardUTrie2StringIterator iter(normTrie, start, p);
137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16;
137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm16=iter.previous16();
138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(!hasCompBoundaryBefore(iter.codePoint, norm16));
138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // but that's probably not worth the extra cost.
138450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return iter.codePointStart;
138550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
138750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit) const {
138850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ForwardUTrie2StringIterator iter(normTrie, p, limit);
138950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16;
139050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
139150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm16=iter.next16();
139250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(!hasCompBoundaryBefore(iter.codePoint, norm16));
139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return iter.codePointStart;
139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
139650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass FCDTrieSingleton : public UTrie2Singleton {
139750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    FCDTrieSingleton(SimpleSingleton &s, Normalizer2Impl &ni, UErrorCode &ec) :
139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTrie2Singleton(s), impl(ni), errorCode(ec) {}
140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UTrie2 *getInstance(UErrorCode &errorCode) {
140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return UTrie2Singleton::getInstance(createInstance, this, errorCode);
140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    static void *createInstance(const void *context, UErrorCode &errorCode);
140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(value!=0) {
140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            impl.setFCD16FromNorm16(start, end, (uint16_t)value, newFCDTrie, errorCode);
140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return U_SUCCESS(errorCode);
140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Normalizer2Impl &impl;
141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UTrie2 *newFCDTrie;
141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode &errorCode;
141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
141650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN
141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Set the FCD value for a range of same-norm16 characters.
141950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV
142050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return ((FCDTrieSingleton *)context)->rangeHandler(start, end, value);
142250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Collect (OR together) the FCD values for a range of supplementary characters,
142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// for their lead surrogate code unit.
142650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV
142727f654740f2a26ad62a5c155af9199af9e69b889clairehoenumRangeOrValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) {
142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    *((uint32_t *)context)|=value;
142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
143250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END
143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
143450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid *FCDTrieSingleton::createInstance(const void *context, UErrorCode &errorCode) {
143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    FCDTrieSingleton *me=(FCDTrieSingleton *)context;
143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    me->newFCDTrie=utrie2_open(0, 0, &errorCode);
143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_SUCCESS(errorCode)) {
143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_enum(me->impl.getNormTrie(), NULL, enumRangeHandler, me);
143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(UChar lead=0xd800; lead<0xdc00; ++lead) {
144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint32_t oredValue=utrie2_get32(me->newFCDTrie, lead);
144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utrie2_enumForLeadSurrogate(me->newFCDTrie, lead, NULL, enumRangeOrValue, &oredValue);
144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(oredValue!=0) {
144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Set a "bad" value for makeFCD() to break the quick check loop
144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // and look up the value for the supplementary code point.
144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If there is any lccc, then set the worst-case lccc of 1.
144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The ORed-together value's tccc is already the worst case.
144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(oredValue>0xff) {
144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    oredValue=0x100|(oredValue&0xff);
144950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
145050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utrie2_set32ForLeadSurrogateCodeUnit(me->newFCDTrie, lead, oredValue, &errorCode);
145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_freeze(me->newFCDTrie, UTRIE2_16_VALUE_BITS, &errorCode);
145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(U_SUCCESS(errorCode)) {
145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return me->newFCDTrie;
145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_close(me->newFCDTrie);
145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return NULL;
146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
146250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                         UTrie2 *newFCDTrie, UErrorCode &errorCode) const {
146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Only loops for 1:1 algorithmic mappings.
146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(norm16>=MIN_NORMAL_MAYBE_YES) {
146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16&=0xff;
146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16|=norm16<<8;
146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(norm16<=minYesNo || minMaybeYes<=norm16) {
147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // no decomposition or Hangul syllable, all zeros
147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(limitNoNo<=norm16) {
147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t delta=norm16-(minMaybeYes-MAX_DELTA-1);
147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(start==end) {
147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                start+=delta;
147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                norm16=getNorm16(start);
147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // the same delta leads from different original characters to different mappings
147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                do {
148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c=start+delta;
148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    setFCD16FromNorm16(c, c, getNorm16(c), newFCDTrie, errorCode);
148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } while(++start<=end);
148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint16_t firstUnit=*mapping;
148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // A character that is deleted (maps to an empty string) must
149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // get the worst-case lccc and tccc values because arbitrary
149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // characters on both sides will become adjacent.
149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                norm16=0x1ff;
149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    norm16=mapping[1]&0xff00;  // lccc
149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    norm16=0;
149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                norm16|=firstUnit>>8;  // tccc
150150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
150250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_setRange32(newFCDTrie, start, end, norm16, TRUE, &errorCode);
150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        break;
150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
150850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UTrie2 *Normalizer2Impl::getFCDTrie(UErrorCode &errorCode) const {
150950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Logically const: Synchronized instantiation.
151050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return FCDTrieSingleton(me->fcdTrieSingleton, *me, errorCode).getInstance(errorCode);
151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Dual functionality:
151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer!=NULL: normalize
151650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
151750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
151850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         ReorderingBuffer *buffer,
152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UErrorCode &errorCode) const {
152127f654740f2a26ad62a5c155af9199af9e69b889claireho    // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
152227f654740f2a26ad62a5c155af9199af9e69b889claireho    // Similar to the prevBoundary in the compose() implementation.
152327f654740f2a26ad62a5c155af9199af9e69b889claireho    const UChar *prevBoundary=src;
152427f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t prevFCD16=0;
152550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(limit==NULL) {
152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src=copyLowPrefixFromNulTerminated(src, MIN_CCC_LCCC_CP, buffer, errorCode);
152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(U_FAILURE(errorCode)) {
152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return src;
152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
153027f654740f2a26ad62a5c155af9199af9e69b889claireho        if(prevBoundary<src) {
153127f654740f2a26ad62a5c155af9199af9e69b889claireho            prevBoundary=src;
153227f654740f2a26ad62a5c155af9199af9e69b889claireho            // We know that the previous character's lccc==0.
153327f654740f2a26ad62a5c155af9199af9e69b889claireho            // Fetching the fcd16 value was deferred for this below-U+0300 code point.
153427f654740f2a26ad62a5c155af9199af9e69b889claireho            prevFCD16=getFCD16FromSingleLead(*(src-1));
153527f654740f2a26ad62a5c155af9199af9e69b889claireho            if(prevFCD16>1) {
153627f654740f2a26ad62a5c155af9199af9e69b889claireho                --prevBoundary;
153727f654740f2a26ad62a5c155af9199af9e69b889claireho            }
153827f654740f2a26ad62a5c155af9199af9e69b889claireho        }
153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit=u_strchr(src, 0);
154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Note: In this function we use buffer->appendZeroCC() because we track
154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // the lead and trail combining classes here, rather than leaving it to
154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // the ReorderingBuffer.
154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // The exception is the call to decomposeShort() which uses the buffer
154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // in the normal way.
154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UTrie2 *trie=fcdTrie();
154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc;
155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=0;
155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t fcd16=0;
155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // count code units with lccc==0
155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(prevSrc=src; src!=limit;) {
155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((c=*src)<MIN_CCC_LCCC_CP) {
155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevFCD16=~c;
155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if((fcd16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c))<=0xff) {
156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevFCD16=fcd16;
156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(!U16_IS_SURROGATE(c)) {
156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c2;
156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_SURROGATE_LEAD(c)) {
156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c, c2);
157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else /* trail surrogate */ {
157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --src;
157450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c2, c);
157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
157750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if((fcd16=getFCD16(c))<=0xff) {
157850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevFCD16=fcd16;
157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    src+=U16_LENGTH(c);
158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
158150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // copy these code units all at once
158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=prevSrc) {
158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) {
158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(src==limit) {
159150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
159250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=src;
159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // We know that the previous character's lccc==0.
159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(prevFCD16<0) {
159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetching the fcd16 value was deferred for this below-U+0300 code point.
159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevFCD16=getFCD16FromSingleLead((UChar)~prevFCD16);
159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(prevFCD16>1) {
159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    --prevBoundary;
160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
160150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const UChar *p=src-1;
160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) {
160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    --p;
160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Need to fetch the previous character's FCD value because
160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // prevFCD16 was just for the trail surrogate code point.
160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevFCD16=getFCD16FromSurrogatePair(p[0], p[1]);
160850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
160950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(prevFCD16>1) {
161150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevBoundary=p;
161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // The start of the current character (c).
161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevSrc=src;
161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(src==limit) {
161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
161950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src+=U16_LENGTH(c);
162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Check for proper order, and decompose locally if necessary.
162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if((prevFCD16&0xff)<=(fcd16>>8)) {
162450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // proper order: prev tccc <= current lccc
162550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((fcd16&0xff)<=1) {
162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary=src;
162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) {
162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevFCD16=fcd16;
163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(buffer==NULL) {
163450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return prevBoundary;  // quick check "no"
163550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Back out the part of the source that we copied or appended
163850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * already but is now going to be decomposed.
163950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * prevSrc is set to after what was copied/appended.
164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Find the part of the source that needs to be decomposed,
164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * up to the next safe boundary.
164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            src=findNextFCDBoundary(src, limit);
164750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * The source text does not fulfill the conditions for FCD.
164950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Decompose and reorder a limited piece of the text.
165050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
165150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!decomposeShort(prevBoundary, src, *buffer, errorCode)) {
165250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
165350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
165450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=src;
165550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevFCD16=0;
165650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
165750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
165850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return src;
165950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
166050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
166150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,
166250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UBool doMakeFCD,
1663b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                       UnicodeString &safeMiddle,
166450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       ReorderingBuffer &buffer,
166550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UErrorCode &errorCode) const {
166650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!buffer.isEmpty()) {
166750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
166850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=firstBoundaryInSrc) {
166950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
167050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                                    buffer.getLimit());
1671b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
1672b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UnicodeString middle(lastBoundaryInDest, destSuffixLength);
1673b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            buffer.removeSuffix(destSuffixLength);
1674b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            safeMiddle=middle;
167550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            middle.append(src, (int32_t)(firstBoundaryInSrc-src));
167650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *middleStart=middle.getBuffer();
167750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
167850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(U_FAILURE(errorCode)) {
167950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return;
168050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
168150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            src=firstBoundaryInSrc;
168250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
168350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
168450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(doMakeFCD) {
168550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        makeFCD(src, limit, &buffer, errorCode);
168650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
1687b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
1688b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            limit=u_strchr(src, 0);
1689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
169050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        buffer.appendZeroCC(src, limit, errorCode);
169150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
169250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
169350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
169450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const {
169550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    BackwardUTrie2StringIterator iter(fcdTrie(), start, p);
169650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t fcd16;
169750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fcd16=iter.previous16();
169950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(fcd16>0xff);
170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return iter.codePointStart;
170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
170350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const {
170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ForwardUTrie2StringIterator iter(fcdTrie(), p, limit);
170550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t fcd16;
170650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
170750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fcd16=iter.next16();
170850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(fcd16>0xff);
170950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return iter.codePointStart;
171050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
171150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
171227f654740f2a26ad62a5c155af9199af9e69b889claireho// CanonicalIterator data -------------------------------------------------- ***
171327f654740f2a26ad62a5c155af9199af9e69b889claireho
171427f654740f2a26ad62a5c155af9199af9e69b889clairehoCanonIterData::CanonIterData(UErrorCode &errorCode) :
171527f654740f2a26ad62a5c155af9199af9e69b889claireho        trie(utrie2_open(0, 0, &errorCode)),
171627f654740f2a26ad62a5c155af9199af9e69b889claireho        canonStartSets(uhash_deleteUObject, NULL, errorCode) {}
171727f654740f2a26ad62a5c155af9199af9e69b889claireho
171827f654740f2a26ad62a5c155af9199af9e69b889clairehoCanonIterData::~CanonIterData() {
171927f654740f2a26ad62a5c155af9199af9e69b889claireho    utrie2_close(trie);
172027f654740f2a26ad62a5c155af9199af9e69b889claireho}
172127f654740f2a26ad62a5c155af9199af9e69b889claireho
172227f654740f2a26ad62a5c155af9199af9e69b889clairehovoid CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
172327f654740f2a26ad62a5c155af9199af9e69b889claireho    uint32_t canonValue=utrie2_get32(trie, decompLead);
172427f654740f2a26ad62a5c155af9199af9e69b889claireho    if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
172527f654740f2a26ad62a5c155af9199af9e69b889claireho        // origin is the first character whose decomposition starts with
172627f654740f2a26ad62a5c155af9199af9e69b889claireho        // the character for which we are setting the value.
172727f654740f2a26ad62a5c155af9199af9e69b889claireho        utrie2_set32(trie, decompLead, canonValue|origin, &errorCode);
172827f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
172927f654740f2a26ad62a5c155af9199af9e69b889claireho        // origin is not the first character, or it is U+0000.
173027f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeSet *set;
173127f654740f2a26ad62a5c155af9199af9e69b889claireho        if((canonValue&CANON_HAS_SET)==0) {
173227f654740f2a26ad62a5c155af9199af9e69b889claireho            set=new UnicodeSet;
173327f654740f2a26ad62a5c155af9199af9e69b889claireho            if(set==NULL) {
173427f654740f2a26ad62a5c155af9199af9e69b889claireho                errorCode=U_MEMORY_ALLOCATION_ERROR;
173527f654740f2a26ad62a5c155af9199af9e69b889claireho                return;
173627f654740f2a26ad62a5c155af9199af9e69b889claireho            }
173727f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
173827f654740f2a26ad62a5c155af9199af9e69b889claireho            canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
173927f654740f2a26ad62a5c155af9199af9e69b889claireho            utrie2_set32(trie, decompLead, canonValue, &errorCode);
174027f654740f2a26ad62a5c155af9199af9e69b889claireho            canonStartSets.addElement(set, errorCode);
174127f654740f2a26ad62a5c155af9199af9e69b889claireho            if(firstOrigin!=0) {
174227f654740f2a26ad62a5c155af9199af9e69b889claireho                set->add(firstOrigin);
174327f654740f2a26ad62a5c155af9199af9e69b889claireho            }
174427f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
174527f654740f2a26ad62a5c155af9199af9e69b889claireho            set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)];
174627f654740f2a26ad62a5c155af9199af9e69b889claireho        }
174727f654740f2a26ad62a5c155af9199af9e69b889claireho        set->add(origin);
174827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
174927f654740f2a26ad62a5c155af9199af9e69b889claireho}
175027f654740f2a26ad62a5c155af9199af9e69b889claireho
175127f654740f2a26ad62a5c155af9199af9e69b889clairehoclass CanonIterDataSingleton {
175227f654740f2a26ad62a5c155af9199af9e69b889clairehopublic:
175327f654740f2a26ad62a5c155af9199af9e69b889claireho    CanonIterDataSingleton(SimpleSingleton &s, Normalizer2Impl &ni, UErrorCode &ec) :
175427f654740f2a26ad62a5c155af9199af9e69b889claireho        singleton(s), impl(ni), errorCode(ec) {}
175527f654740f2a26ad62a5c155af9199af9e69b889claireho    CanonIterData *getInstance(UErrorCode &errorCode) {
175627f654740f2a26ad62a5c155af9199af9e69b889claireho        void *duplicate;
175727f654740f2a26ad62a5c155af9199af9e69b889claireho        CanonIterData *instance=
175827f654740f2a26ad62a5c155af9199af9e69b889claireho            (CanonIterData *)singleton.getInstance(createInstance, this, duplicate, errorCode);
175927f654740f2a26ad62a5c155af9199af9e69b889claireho        delete (CanonIterData *)duplicate;
176027f654740f2a26ad62a5c155af9199af9e69b889claireho        return instance;
176127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
176227f654740f2a26ad62a5c155af9199af9e69b889claireho    static void *createInstance(const void *context, UErrorCode &errorCode);
176327f654740f2a26ad62a5c155af9199af9e69b889claireho    UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
176427f654740f2a26ad62a5c155af9199af9e69b889claireho        if(value!=0) {
176527f654740f2a26ad62a5c155af9199af9e69b889claireho            impl.makeCanonIterDataFromNorm16(start, end, (uint16_t)value, *newData, errorCode);
176627f654740f2a26ad62a5c155af9199af9e69b889claireho        }
176727f654740f2a26ad62a5c155af9199af9e69b889claireho        return U_SUCCESS(errorCode);
176827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
176927f654740f2a26ad62a5c155af9199af9e69b889claireho
177027f654740f2a26ad62a5c155af9199af9e69b889clairehoprivate:
177127f654740f2a26ad62a5c155af9199af9e69b889claireho    SimpleSingleton &singleton;
177227f654740f2a26ad62a5c155af9199af9e69b889claireho    Normalizer2Impl &impl;
177327f654740f2a26ad62a5c155af9199af9e69b889claireho    CanonIterData *newData;
177427f654740f2a26ad62a5c155af9199af9e69b889claireho    UErrorCode &errorCode;
177527f654740f2a26ad62a5c155af9199af9e69b889claireho};
177627f654740f2a26ad62a5c155af9199af9e69b889claireho
177727f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_BEGIN
177827f654740f2a26ad62a5c155af9199af9e69b889claireho
177927f654740f2a26ad62a5c155af9199af9e69b889claireho// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
178027f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool U_CALLCONV
178127f654740f2a26ad62a5c155af9199af9e69b889clairehoenumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
178227f654740f2a26ad62a5c155af9199af9e69b889claireho    return ((CanonIterDataSingleton *)context)->rangeHandler(start, end, value);
178327f654740f2a26ad62a5c155af9199af9e69b889claireho}
178427f654740f2a26ad62a5c155af9199af9e69b889claireho
178527f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_END
178627f654740f2a26ad62a5c155af9199af9e69b889claireho
178727f654740f2a26ad62a5c155af9199af9e69b889clairehovoid *CanonIterDataSingleton::createInstance(const void *context, UErrorCode &errorCode) {
178827f654740f2a26ad62a5c155af9199af9e69b889claireho    CanonIterDataSingleton *me=(CanonIterDataSingleton *)context;
178927f654740f2a26ad62a5c155af9199af9e69b889claireho    me->newData=new CanonIterData(errorCode);
179027f654740f2a26ad62a5c155af9199af9e69b889claireho    if(me->newData==NULL) {
179127f654740f2a26ad62a5c155af9199af9e69b889claireho        errorCode=U_MEMORY_ALLOCATION_ERROR;
179227f654740f2a26ad62a5c155af9199af9e69b889claireho        return NULL;
179327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
179427f654740f2a26ad62a5c155af9199af9e69b889claireho    if(U_SUCCESS(errorCode)) {
179527f654740f2a26ad62a5c155af9199af9e69b889claireho        utrie2_enum(me->impl.getNormTrie(), NULL, enumCIDRangeHandler, me);
179627f654740f2a26ad62a5c155af9199af9e69b889claireho        utrie2_freeze(me->newData->trie, UTRIE2_32_VALUE_BITS, &errorCode);
179727f654740f2a26ad62a5c155af9199af9e69b889claireho        if(U_SUCCESS(errorCode)) {
179827f654740f2a26ad62a5c155af9199af9e69b889claireho            return me->newData;
179927f654740f2a26ad62a5c155af9199af9e69b889claireho        }
180027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
180127f654740f2a26ad62a5c155af9199af9e69b889claireho    delete me->newData;
180227f654740f2a26ad62a5c155af9199af9e69b889claireho    return NULL;
180327f654740f2a26ad62a5c155af9199af9e69b889claireho}
180427f654740f2a26ad62a5c155af9199af9e69b889claireho
180527f654740f2a26ad62a5c155af9199af9e69b889clairehovoid Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
180627f654740f2a26ad62a5c155af9199af9e69b889claireho                                                  CanonIterData &newData,
180727f654740f2a26ad62a5c155af9199af9e69b889claireho                                                  UErrorCode &errorCode) const {
180827f654740f2a26ad62a5c155af9199af9e69b889claireho    if(norm16==0 || (minYesNo<=norm16 && norm16<minNoNo)) {
180927f654740f2a26ad62a5c155af9199af9e69b889claireho        // Inert, or 2-way mapping (including Hangul syllable).
181027f654740f2a26ad62a5c155af9199af9e69b889claireho        // We do not write a canonStartSet for any yesNo character.
181127f654740f2a26ad62a5c155af9199af9e69b889claireho        // Composites from 2-way mappings are added at runtime from the
181227f654740f2a26ad62a5c155af9199af9e69b889claireho        // starter's compositions list, and the other characters in
181327f654740f2a26ad62a5c155af9199af9e69b889claireho        // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
181427f654740f2a26ad62a5c155af9199af9e69b889claireho        // "maybe" characters.
181527f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
181627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
181727f654740f2a26ad62a5c155af9199af9e69b889claireho    for(UChar32 c=start; c<=end; ++c) {
181827f654740f2a26ad62a5c155af9199af9e69b889claireho        uint32_t oldValue=utrie2_get32(newData.trie, c);
181927f654740f2a26ad62a5c155af9199af9e69b889claireho        uint32_t newValue=oldValue;
182027f654740f2a26ad62a5c155af9199af9e69b889claireho        if(norm16>=minMaybeYes) {
182127f654740f2a26ad62a5c155af9199af9e69b889claireho            // not a segment starter if it occurs in a decomposition or has cc!=0
182227f654740f2a26ad62a5c155af9199af9e69b889claireho            newValue|=CANON_NOT_SEGMENT_STARTER;
182327f654740f2a26ad62a5c155af9199af9e69b889claireho            if(norm16<MIN_NORMAL_MAYBE_YES) {
182427f654740f2a26ad62a5c155af9199af9e69b889claireho                newValue|=CANON_HAS_COMPOSITIONS;
182527f654740f2a26ad62a5c155af9199af9e69b889claireho            }
182627f654740f2a26ad62a5c155af9199af9e69b889claireho        } else if(norm16<minYesNo) {
182727f654740f2a26ad62a5c155af9199af9e69b889claireho            newValue|=CANON_HAS_COMPOSITIONS;
182827f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
182927f654740f2a26ad62a5c155af9199af9e69b889claireho            // c has a one-way decomposition
183027f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar32 c2=c;
183127f654740f2a26ad62a5c155af9199af9e69b889claireho            uint16_t norm16_2=norm16;
183227f654740f2a26ad62a5c155af9199af9e69b889claireho            while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) {
183327f654740f2a26ad62a5c155af9199af9e69b889claireho                c2=mapAlgorithmic(c2, norm16_2);
183427f654740f2a26ad62a5c155af9199af9e69b889claireho                norm16_2=getNorm16(c2);
183527f654740f2a26ad62a5c155af9199af9e69b889claireho            }
183627f654740f2a26ad62a5c155af9199af9e69b889claireho            if(minYesNo<=norm16_2 && norm16_2<limitNoNo) {
183727f654740f2a26ad62a5c155af9199af9e69b889claireho                // c decomposes, get everything from the variable-length extra data
183827f654740f2a26ad62a5c155af9199af9e69b889claireho                const uint16_t *mapping=getMapping(norm16_2);
183927f654740f2a26ad62a5c155af9199af9e69b889claireho                uint16_t firstUnit=*mapping++;
184027f654740f2a26ad62a5c155af9199af9e69b889claireho                int32_t length=firstUnit&MAPPING_LENGTH_MASK;
184127f654740f2a26ad62a5c155af9199af9e69b889claireho                if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
184227f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(c==c2 && (*mapping&0xff)!=0) {
184327f654740f2a26ad62a5c155af9199af9e69b889claireho                        newValue|=CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
184427f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
184527f654740f2a26ad62a5c155af9199af9e69b889claireho                    ++mapping;
184627f654740f2a26ad62a5c155af9199af9e69b889claireho                }
184727f654740f2a26ad62a5c155af9199af9e69b889claireho                // Skip empty mappings (no characters in the decomposition).
184827f654740f2a26ad62a5c155af9199af9e69b889claireho                if(length!=0) {
184927f654740f2a26ad62a5c155af9199af9e69b889claireho                    // add c to first code point's start set
185027f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t i=0;
185127f654740f2a26ad62a5c155af9199af9e69b889claireho                    U16_NEXT_UNSAFE(mapping, i, c2);
185227f654740f2a26ad62a5c155af9199af9e69b889claireho                    newData.addToStartSet(c, c2, errorCode);
185327f654740f2a26ad62a5c155af9199af9e69b889claireho                    // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
185427f654740f2a26ad62a5c155af9199af9e69b889claireho                    // one-way mapping. A 2-way mapping is possible here after
185527f654740f2a26ad62a5c155af9199af9e69b889claireho                    // intermediate algorithmic mapping.
185627f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(norm16_2>=minNoNo) {
185727f654740f2a26ad62a5c155af9199af9e69b889claireho                        while(i<length) {
185827f654740f2a26ad62a5c155af9199af9e69b889claireho                            U16_NEXT_UNSAFE(mapping, i, c2);
185927f654740f2a26ad62a5c155af9199af9e69b889claireho                            uint32_t c2Value=utrie2_get32(newData.trie, c2);
186027f654740f2a26ad62a5c155af9199af9e69b889claireho                            if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
186127f654740f2a26ad62a5c155af9199af9e69b889claireho                                utrie2_set32(newData.trie, c2, c2Value|CANON_NOT_SEGMENT_STARTER,
186227f654740f2a26ad62a5c155af9199af9e69b889claireho                                             &errorCode);
186327f654740f2a26ad62a5c155af9199af9e69b889claireho                            }
186427f654740f2a26ad62a5c155af9199af9e69b889claireho                        }
186527f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
186627f654740f2a26ad62a5c155af9199af9e69b889claireho                }
186727f654740f2a26ad62a5c155af9199af9e69b889claireho            } else {
186827f654740f2a26ad62a5c155af9199af9e69b889claireho                // c decomposed to c2 algorithmically; c has cc==0
186927f654740f2a26ad62a5c155af9199af9e69b889claireho                newData.addToStartSet(c, c2, errorCode);
187027f654740f2a26ad62a5c155af9199af9e69b889claireho            }
187127f654740f2a26ad62a5c155af9199af9e69b889claireho        }
187227f654740f2a26ad62a5c155af9199af9e69b889claireho        if(newValue!=oldValue) {
187327f654740f2a26ad62a5c155af9199af9e69b889claireho            utrie2_set32(newData.trie, c, newValue, &errorCode);
187427f654740f2a26ad62a5c155af9199af9e69b889claireho        }
187527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
187627f654740f2a26ad62a5c155af9199af9e69b889claireho}
187727f654740f2a26ad62a5c155af9199af9e69b889claireho
187827f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
187927f654740f2a26ad62a5c155af9199af9e69b889claireho    // Logically const: Synchronized instantiation.
188027f654740f2a26ad62a5c155af9199af9e69b889claireho    Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
188127f654740f2a26ad62a5c155af9199af9e69b889claireho    CanonIterDataSingleton(me->canonIterDataSingleton, *me, errorCode).getInstance(errorCode);
188227f654740f2a26ad62a5c155af9199af9e69b889claireho    return U_SUCCESS(errorCode);
188327f654740f2a26ad62a5c155af9199af9e69b889claireho}
188427f654740f2a26ad62a5c155af9199af9e69b889claireho
188527f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
188627f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)utrie2_get32(((CanonIterData *)canonIterDataSingleton.fInstance)->trie, c);
188727f654740f2a26ad62a5c155af9199af9e69b889claireho}
188827f654740f2a26ad62a5c155af9199af9e69b889claireho
188927f654740f2a26ad62a5c155af9199af9e69b889clairehoconst UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
189027f654740f2a26ad62a5c155af9199af9e69b889claireho    return *(const UnicodeSet *)(
189127f654740f2a26ad62a5c155af9199af9e69b889claireho        ((CanonIterData *)canonIterDataSingleton.fInstance)->canonStartSets[n]);
189227f654740f2a26ad62a5c155af9199af9e69b889claireho}
189327f654740f2a26ad62a5c155af9199af9e69b889claireho
189427f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
189527f654740f2a26ad62a5c155af9199af9e69b889claireho    return getCanonValue(c)>=0;
189627f654740f2a26ad62a5c155af9199af9e69b889claireho}
189727f654740f2a26ad62a5c155af9199af9e69b889claireho
189827f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
189927f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER;
190027f654740f2a26ad62a5c155af9199af9e69b889claireho    if(canonValue==0) {
190127f654740f2a26ad62a5c155af9199af9e69b889claireho        return FALSE;
190227f654740f2a26ad62a5c155af9199af9e69b889claireho    }
190327f654740f2a26ad62a5c155af9199af9e69b889claireho    set.clear();
190427f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t value=canonValue&CANON_VALUE_MASK;
190527f654740f2a26ad62a5c155af9199af9e69b889claireho    if((canonValue&CANON_HAS_SET)!=0) {
190627f654740f2a26ad62a5c155af9199af9e69b889claireho        set.addAll(getCanonStartSet(value));
190727f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if(value!=0) {
190827f654740f2a26ad62a5c155af9199af9e69b889claireho        set.add(value);
190927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
191027f654740f2a26ad62a5c155af9199af9e69b889claireho    if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
191127f654740f2a26ad62a5c155af9199af9e69b889claireho        uint16_t norm16=getNorm16(c);
191227f654740f2a26ad62a5c155af9199af9e69b889claireho        if(norm16==JAMO_L) {
191327f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar32 syllable=
191427f654740f2a26ad62a5c155af9199af9e69b889claireho                (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
191527f654740f2a26ad62a5c155af9199af9e69b889claireho            set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
191627f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
191727f654740f2a26ad62a5c155af9199af9e69b889claireho            addComposites(getCompositionsList(norm16), set);
191827f654740f2a26ad62a5c155af9199af9e69b889claireho        }
191927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
192027f654740f2a26ad62a5c155af9199af9e69b889claireho    return TRUE;
192127f654740f2a26ad62a5c155af9199af9e69b889claireho}
192227f654740f2a26ad62a5c155af9199af9e69b889claireho
192350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END
192450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
192550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Normalizer2 data swapping ----------------------------------------------- ***
192650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
192750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_USE
192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
192950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI int32_t U_EXPORT2
193050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_swap(const UDataSwapper *ds,
193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const void *inData, int32_t length, void *outData,
193250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode *pErrorCode) {
193350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UDataInfo *pInfo;
193450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t headerSize;
193550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint8_t *inBytes;
193750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t *outBytes;
193850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
193950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const int32_t *inIndexes;
194050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t indexes[Normalizer2Impl::IX_MIN_MAYBE_YES+1];
194150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i, offset, nextOffset, size;
194350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* udata_swapDataHeader checks the arguments */
194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
194950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* check data format and format version */
195150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pInfo=(const UDataInfo *)((const char *)inData+4);
195250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!(
195350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Nrm2" */
195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[1]==0x72 &&
195550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[2]==0x6d &&
195650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[3]==0x32 &&
195750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->formatVersion[0]==1
195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    )) {
195950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
196050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         pInfo->dataFormat[0], pInfo->dataFormat[1],
196150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         pInfo->dataFormat[2], pInfo->dataFormat[3],
196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         pInfo->formatVersion[0]);
196350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pErrorCode=U_UNSUPPORTED_ERROR;
196450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
196550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
196650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
196750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    inBytes=(const uint8_t *)inData+headerSize;
196850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    outBytes=(uint8_t *)outData+headerSize;
196950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
197050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    inIndexes=(const int32_t *)inBytes;
197150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
197250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length>=0) {
197350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length-=headerSize;
197427f654740f2a26ad62a5c155af9199af9e69b889claireho        if(length<(int32_t)sizeof(indexes)) {
197550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
197650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             length);
197750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
197850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return 0;
197950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
198050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
198150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
198250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* read the first few indexes */
198350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(i=0; i<=Normalizer2Impl::IX_MIN_MAYBE_YES; ++i) {
198450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        indexes[i]=udata_readInt32(ds, inIndexes[i]);
198550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
198650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
198750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* get the total length of the data */
198850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    size=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
198950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
199050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length>=0) {
199150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(length<size) {
199250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n",
199350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             length);
199450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
199550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return 0;
199650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
199750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
199850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* copy the data for inaccessible bytes */
199950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(inBytes!=outBytes) {
200050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uprv_memcpy(outBytes, inBytes, size);
200150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
200250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
200350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        offset=0;
200450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
200550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* swap the int32_t indexes[] */
200650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET];
200750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
200850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        offset=nextOffset;
200950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
201050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* swap the UTrie2 */
201150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
201250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
201350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        offset=nextOffset;
201450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
201550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* swap the uint16_t extraData[] */
201650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET+1];
201750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
201850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        offset=nextOffset;
201950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
202050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(offset==size);
202150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
202250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
202350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return headerSize+size;
202450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
202550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
202650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif  // !UCONFIG_NO_NORMALIZATION
2027