150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*   Copyright (C) 2009-2014, International Business Machines
550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Corporation and others.  All Rights Reserved.
650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   file name:  normalizer2impl.cpp
950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   encoding:   US-ASCII
1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   tab size:   8 (not used)
1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   indentation:4
1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created on: 2009nov22
1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created by: Markus W. Scherer
1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/
1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h"
1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/normalizer2.h"
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/udata.h"
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h"
24103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "cmemory.h"
2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "mutex.h"
2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h"
28103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "putilimp.h"
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uassert.h"
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uset_imp.h"
3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "utrie2.h"
3227f654740f2a26ad62a5c155af9199af9e69b889claireho#include "uvector.h"
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// ReorderingBuffer -------------------------------------------------------- ***
3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=str.length();
4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    start=str.getBuffer(destCapacity);
4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(start==NULL) {
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // getBuffer() already did str.setToBogus()
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode=U_MEMORY_ALLOCATION_ERROR;
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    limit=start+length;
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity=str.getCapacity()-length;
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=start;
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(start==limit) {
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lastCC=0;
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        setIterator();
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lastCC=previousCC();
5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Set reorderStart after the last code point with cc<=1 if there is one.
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(lastCC>1) {
5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            while(previousCC()>1) {}
5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reorderStart=codePointLimit;
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const {
6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=(int32_t)(limit-start);
6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return
6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length==(int32_t)(otherLimit-otherStart) &&
6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        0==u_memcmp(start, otherStart, length);
6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
7050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(remainingCapacity<2 && !resize(2, errorCode)) {
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(lastCC<=cc || cc==0) {
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit[0]=U16_LEAD(c);
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit[1]=U16_TRAIL(c);
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit+=2;
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lastCC=cc;
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(cc<=1) {
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reorderStart=limit;
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        insert(c, cc);
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity-=2;
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::append(const UChar *s, int32_t length,
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                               uint8_t leadCC, uint8_t trailCC,
9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                               UErrorCode &errorCode) {
9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length==0) {
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(remainingCapacity<length && !resize(length, errorCode)) {
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity-=length;
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(lastCC<=leadCC || leadCC==0) {
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(trailCC<=1) {
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reorderStart=limit+length;
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(leadCC<=1) {
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reorderStart=limit+1;  // Ok if not a code point boundary.
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *sLimit=s+length;
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        do { *limit++=*s++; } while(s!=sLimit);
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        lastCC=trailCC;
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t i=0;
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U16_NEXT(s, i, length, c);
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        insert(c, leadCC);  // insert first code point
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(i<length) {
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(s, i, length, c);
11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(i<length) {
11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // s must be in NFD, otherwise we need to use getCC().
11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                leadCC=trailCC;
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            append(c, leadCC, errorCode);
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t cpLength=U16_LENGTH(c);
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) {
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity-=cpLength;
13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(cpLength==1) {
13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *limit++=(UChar)c;
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit[0]=U16_LEAD(c);
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit[1]=U16_TRAIL(c);
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit+=2;
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    lastCC=0;
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=limit;
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode) {
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(s==sLimit) {
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=(int32_t)(sLimit-s);
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(remainingCapacity<length && !resize(length, errorCode)) {
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_memcpy(limit, s, length);
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    limit+=length;
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity-=length;
15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    lastCC=0;
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=limit;
15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
16150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::remove() {
16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=limit=start;
16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity=str.getCapacity();
16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    lastCC=0;
16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::removeSuffix(int32_t suffixLength) {
16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(suffixLength<(limit-start)) {
16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit-=suffixLength;
17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        remainingCapacity+=suffixLength;
17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit=start;
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        remainingCapacity=str.getCapacity();
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    lastCC=0;
17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=limit;
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
17950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t reorderStartIndex=(int32_t)(reorderStart-start);
18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t length=(int32_t)(limit-start);
18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    str.releaseBuffer(length);
18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t newCapacity=length+appendLength;
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t doubleCapacity=2*str.getCapacity();
18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(newCapacity<doubleCapacity) {
18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        newCapacity=doubleCapacity;
18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(newCapacity<256) {
18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        newCapacity=256;
19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    start=str.getBuffer(newCapacity);
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(start==NULL) {
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // getBuffer() already did str.setToBogus()
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode=U_MEMORY_ALLOCATION_ERROR;
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reorderStart=start+reorderStartIndex;
19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    limit=start+length;
19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    remainingCapacity=str.getCapacity()-length;
20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
20350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::skipPrevious() {
20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    codePointLimit=codePointStart;
20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar c=*--codePointStart;
20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) {
20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        --codePointStart;
20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
21150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouint8_t ReorderingBuffer::previousCC() {
21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    codePointLimit=codePointStart;
21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(reorderStart>=codePointStart) {
21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=*--codePointStart;
21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(c<Normalizer2Impl::MIN_CCC_LCCC_CP) {
21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar c2;
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) {
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        --codePointStart;
22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c=U16_GET_SUPPLEMENTARY(c2, c);
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Inserts c somewhere before the last character.
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Requires 0<cc<lastCC which implies reorderStart<limit.
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid ReorderingBuffer::insert(UChar32 c, uint8_t cc) {
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(setIterator(), skipPrevious(); previousCC()>cc;) {}
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // insert c at codePointLimit, after the character with prevCC<=cc
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *q=limit;
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *r=limit+=U16_LENGTH(c);
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *--r=*--q;
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(codePointLimit!=q);
23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    writeCodePoint(q, c);
24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(cc<=1) {
24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reorderStart=r;
24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Normalizer2Impl --------------------------------------------------------- ***
24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24727f654740f2a26ad62a5c155af9199af9e69b889clairehostruct CanonIterData : public UMemory {
24827f654740f2a26ad62a5c155af9199af9e69b889claireho    CanonIterData(UErrorCode &errorCode);
24927f654740f2a26ad62a5c155af9199af9e69b889claireho    ~CanonIterData();
25027f654740f2a26ad62a5c155af9199af9e69b889claireho    void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
25127f654740f2a26ad62a5c155af9199af9e69b889claireho    UTrie2 *trie;
25227f654740f2a26ad62a5c155af9199af9e69b889claireho    UVector canonStartSets;  // contains UnicodeSet *
25327f654740f2a26ad62a5c155af9199af9e69b889claireho};
25427f654740f2a26ad62a5c155af9199af9e69b889claireho
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::~Normalizer2Impl() {
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    udata_close(memory);
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utrie2_close(normTrie);
25859d709d503bab6e2b61931737e662dd293b40578ccornelius    delete fCanonIterData;
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool U_CALLCONV
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::isAcceptable(void *context,
26327f654740f2a26ad62a5c155af9199af9e69b889claireho                              const char * /* type */, const char * /*name*/,
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                              const UDataInfo *pInfo) {
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->size>=20 &&
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->charsetFamily==U_CHARSET_FAMILY &&
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[1]==0x72 &&
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[2]==0x6d &&
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[3]==0x32 &&
273103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        pInfo->formatVersion[0]==2
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        Normalizer2Impl *me=(Normalizer2Impl *)context;
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return TRUE;
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(errorCode)) {
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(errorCode)) {
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const int32_t *inIndexes=(const int32_t *)inBytes;
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(indexesLength<=IX_MIN_MAYBE_YES) {
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minYesNo=inIndexes[IX_MIN_YES_NO];
304103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minNoNo=inIndexes[IX_MIN_NO_NO];
30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    limitNoNo=inIndexes[IX_LIMIT_NO_NO];
30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       inBytes+offset, nextOffset-offset, NULL,
31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       &errorCode);
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(errorCode)) {
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    offset=nextOffset;
319103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    maybeYesCompositions=(const uint16_t *)(inBytes+offset);
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);
322103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
323103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // smallFCD: new in formatVersion 2
324103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    offset=nextOffset;
325103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    smallFCD=inBytes+offset;
326103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
327103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // Build tccc180[].
328103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
329103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint8_t bits=0;
330103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    for(UChar c=0; c<0x180; bits>>=1) {
331103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if((c&0xff)==0) {
332103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            bits=smallFCD[c>>8];  // one byte per 0x100 code points
333103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
334103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(bits&1) {
335103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            for(int i=0; i<0x20; ++i, ++c) {
336103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                tccc180[c]=(uint8_t)getFCD16FromNormData(c);
337103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
338103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
339103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            uprv_memset(tccc180+c, 0, 0x20);
340103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            c+=0x20;
341103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
342103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouint8_t Normalizer2Impl::getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const {
34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c;
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(cpStart==(cpLimit-1)) {
34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c=*cpStart;
34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]);
35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t prevNorm16=getNorm16(c);
35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(prevNorm16<=minYesNo) {
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;  // yesYes and Hangul LV/LVT have ccc=tccc=0
35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return (uint8_t)(*getMapping(prevNorm16)>>8);  // tccc from yesNo
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass LcccContext {
363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {}
365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void handleRange(UChar32 start, UChar32 end, uint16_t norm16) {
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(impl.isAlgorithmicNoNo(norm16)) {
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Range of code points with same-norm16-value algorithmic decompositions.
369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // They might have different non-zero FCD16 values.
370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            do {
371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                uint16_t fcd16=impl.getFCD16(start);
372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(fcd16>0xff) { set.add(start); }
373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } while(++start<=end);
374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint16_t fcd16=impl.getFCD16(start);
376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(fcd16>0xff) { set.add(start, end); }
377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const Normalizer2Impl &impl;
382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet &set;
383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstruct PropertyStartsContext {
386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder)
387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : impl(ni), sa(adder) {}
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const Normalizer2Impl &impl;
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const USetAdder *sa;
391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
39550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN
39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
39750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusenumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ((LcccContext *)context)->handleRange(start, end, (uint16_t)value);
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UBool U_CALLCONV
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusenumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /* add the start code point to the USet */
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const PropertyStartsContext *ctx=(const PropertyStartsContext *)context;
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const USetAdder *sa=ctx->sa;
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    sa->add(sa->set, start);
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(start!=end && ctx->impl.isAlgorithmicNoNo((uint16_t)value)) {
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Range of code points with same-norm16-value algorithmic decompositions.
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // They might have different non-zero FCD16 values.
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint16_t prevFCD16=ctx->impl.getFCD16(start);
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        while(++start<=end) {
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint16_t fcd16=ctx->impl.getFCD16(start);
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(fcd16!=prevFCD16) {
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sa->add(sa->set, start);
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                prevFCD16=fcd16;
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UBool U_CALLCONV
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* add the start code point to the USet */
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const USetAdder *sa=(const USetAdder *)context;
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    sa->add(sa->set, start);
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic uint32_t U_CALLCONV
43327f654740f2a26ad62a5c155af9199af9e69b889clairehosegmentStarterMapper(const void * /*context*/, uint32_t value) {
43427f654740f2a26ad62a5c155af9199af9e69b889claireho    return value&CANON_NOT_SEGMENT_STARTER;
43527f654740f2a26ad62a5c155af9199af9e69b889claireho}
43627f654740f2a26ad62a5c155af9199af9e69b889claireho
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusNormalizer2Impl::addLcccChars(UnicodeSet &set) const {
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /* add the start code point of each same-value range of each trie */
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LcccContext context(*this, set);
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    utrie2_enum(normTrie, NULL, enumLcccRange, &context);
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
44727f654740f2a26ad62a5c155af9199af9e69b889clairehoNormalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* add the start code point of each same-value range of each trie */
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    PropertyStartsContext context(*this, sa);
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context);
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* add Hangul LV syllables and LV+1 because of skippables */
45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sa->add(sa->set, c);
45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        sa->add(sa->set, c+1);
45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
46027f654740f2a26ad62a5c155af9199af9e69b889clairehovoid
46127f654740f2a26ad62a5c155af9199af9e69b889clairehoNormalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
46227f654740f2a26ad62a5c155af9199af9e69b889claireho    /* add the start code point of each same-value range of the canonical iterator data trie */
46327f654740f2a26ad62a5c155af9199af9e69b889claireho    if(ensureCanonIterData(errorCode)) {
46427f654740f2a26ad62a5c155af9199af9e69b889claireho        // currently only used for the SEGMENT_STARTER property
46559d709d503bab6e2b61931737e662dd293b40578ccornelius        utrie2_enum(fCanonIterData->trie, segmentStarterMapper, enumPropertyStartsRange, sa);
46627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
46727f654740f2a26ad62a5c155af9199af9e69b889claireho}
46827f654740f2a26ad62a5c155af9199af9e69b889claireho
46950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
47050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src,
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                UChar32 minNeedDataCP,
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                ReorderingBuffer *buffer,
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                UErrorCode &errorCode) const {
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Make some effort to support NUL-terminated strings reasonably.
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Take the part of the fast quick check loop that does not look up
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // data and check the first part of the string.
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // After this prefix, determine the string length to simplify the rest
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // of the code.
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc=src;
48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar c;
48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while((c=*src++)<minNeedDataCP && c!=0) {}
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Back out the last character for full processing.
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Copy this prefix.
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(--src!=prevSrc) {
48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(buffer!=NULL) {
48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer->appendZeroCC(prevSrc, src, errorCode);
48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return src;
49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUnicodeString &
493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusNormalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,
494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UErrorCode &errorCode) const {
495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) {
496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dest.setToBogus();
497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return dest;
498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *sArray=src.getBuffer();
500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(&dest==&src || sArray==NULL) {
501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dest.setToBogus();
503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return dest;
504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);
506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return dest;
507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusNormalizer2Impl::decompose(const UChar *src, const UChar *limit,
511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UnicodeString &dest,
512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           int32_t destLengthEstimate,
513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UErrorCode &errorCode) const {
514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(destLengthEstimate<0 && limit!=NULL) {
515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        destLengthEstimate=(int32_t)(limit-src);
516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    dest.remove();
518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ReorderingBuffer buffer(*this, dest);
519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(buffer.init(destLengthEstimate, errorCode)) {
520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        decompose(src, limit, &buffer, errorCode);
521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
52450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Dual functionality:
52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer!=NULL: normalize
52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer==NULL: isNormalized/spanQuickCheckYes
52750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
52850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::decompose(const UChar *src, const UChar *limit,
52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           ReorderingBuffer *buffer,
53050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           UErrorCode &errorCode) const {
53150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 minNoCP=minDecompNoCP;
53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(limit==NULL) {
53350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
53450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(U_FAILURE(errorCode)) {
53550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return src;
53650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
53750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit=u_strchr(src, 0);
53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
53950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
54050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc;
54150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=0;
54250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16=0;
54350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // only for quick check
54550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevBoundary=src;
54650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t prevCC=0;
54750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
54850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
54950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // count code units below the minimum or with irrelevant data for the quick check
55050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(prevSrc=src; src!=limit;) {
55150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( (c=*src)<minNoCP ||
55250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
55350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
55450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
55550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(!U16_IS_SURROGATE(c)) {
55650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
55750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c2;
55950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_SURROGATE_LEAD(c)) {
56050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
56150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c, c2);
56250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
56350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else /* trail surrogate */ {
56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
56550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --src;
56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c2, c);
56750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    src+=U16_LENGTH(c);
57150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
57350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
57650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // copy these code units all at once
57750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=prevSrc) {
57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(buffer!=NULL) {
57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {
58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
58250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC=0;
58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary=src;
58550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
58650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
58750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src==limit) {
58850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
58950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
59050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
59150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Check one above-minimum, relevant code point.
59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src+=U16_LENGTH(c);
59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(buffer!=NULL) {
59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!decompose(c, norm16, *buffer, errorCode)) {
59550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(isDecompYes(norm16)) {
59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                uint8_t cc=getCCFromYesOrMaybe(norm16);
60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(prevCC<=cc || cc==0) {
60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevCC=cc;
60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(cc<=1) {
60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        prevBoundary=src;
60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
60550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
60750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
60850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return prevBoundary;  // "no" or cc out of order
60950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
61050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
61150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return src;
61250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
61350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
61450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Decompose a short piece of text which is likely to contain characters that
61550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// fail the quick check loop and/or where the quick check loop's overhead
61650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// is unlikely to be amortized.
61750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Called by the compose() and makeFCD() implementations.
61850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,
61950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                      ReorderingBuffer &buffer,
62050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                      UErrorCode &errorCode) const {
62150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(src<limit) {
62250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
62350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t norm16;
62450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
62550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(!decompose(c, norm16, buffer, errorCode)) {
62650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
62750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
62850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
62950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
63050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
63150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
63250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
63350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                 ReorderingBuffer &buffer,
63450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                 UErrorCode &errorCode) const {
63550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Only loops for 1:1 algorithmic mappings.
63650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // get the decomposition and the lead and trail cc's
63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isDecompYes(norm16)) {
63950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c does not decompose
64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
64150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isHangul(norm16)) {
64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Hangul syllable: decompose algorithmically
64350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar jamos[3];
64450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=getNorm16(c);
64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
64950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
65050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
651103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            uint16_t firstUnit=*mapping;
65250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t length=firstUnit&MAPPING_LENGTH_MASK;
65350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint8_t leadCC, trailCC;
65450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            trailCC=(uint8_t)(firstUnit>>8);
65550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
656103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                leadCC=(uint8_t)(*(mapping-1)>>8);
65750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
65850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                leadCC=0;
65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
660103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return buffer.append((const UChar *)mapping+1, length, leadCC, trailCC, errorCode);
66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
66250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
66350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
66450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
66550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
66650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const {
66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *decomp=NULL;
66850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16;
66950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
67050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
67150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c does not decompose
67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return decomp;
67350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isHangul(norm16)) {
67450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Hangul syllable: decompose algorithmically
67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            length=Hangul::decompose(c, buffer);
67650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return buffer;
67750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
67850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
67950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            decomp=buffer;
68050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            length=0;
68150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_APPEND_UNSAFE(buffer, length, c);
68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
68350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
68450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
685103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            length=*mapping&MAPPING_LENGTH_MASK;
686103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return (const UChar *)mapping+1;
687103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
688103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
689103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
690103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
691103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
692103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// so that a raw mapping fits that consists of one unit ("rm0")
693103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// plus all but the first two code units of the normal mapping.
694103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK.
695103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst UChar *
696103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusNormalizer2Impl::getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const {
697103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // We do not loop in this method because an algorithmic mapping itself
698103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    // becomes a final result rather than having to be decomposed recursively.
699103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint16_t norm16;
700103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
701103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // c does not decompose
702103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return NULL;
703103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else if(isHangul(norm16)) {
704103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // Hangul syllable: decompose algorithmically
705103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        Hangul::getRawDecomposition(c, buffer);
706103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        length=2;
707103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return buffer;
708103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else if(isDecompNoAlgorithmic(norm16)) {
709103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        c=mapAlgorithmic(c, norm16);
710103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        length=0;
711103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        U16_APPEND_UNSAFE(buffer, length, c);
712103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return buffer;
713103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else {
714103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        // c decomposes, get everything from the variable-length extra data
715103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        const uint16_t *mapping=getMapping(norm16);
716103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        uint16_t firstUnit=*mapping;
717103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        int32_t mLength=firstUnit&MAPPING_LENGTH_MASK;  // length of normal mapping
718103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(firstUnit&MAPPING_HAS_RAW_MAPPING) {
719103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
720103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
721103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1;
722103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            uint16_t rm0=*rawMapping;
723103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(rm0<=MAPPING_LENGTH_MASK) {
724103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                length=rm0;
725103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return (const UChar *)rawMapping-rm0;
726103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else {
727103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                // Copy the normal mapping and replace its first two code units with rm0.
728103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                buffer[0]=(UChar)rm0;
729103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2);
730103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                length=mLength-1;
731103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return buffer;
73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
733103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
734103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            length=mLength;
735103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return (const UChar *)mapping+1;
73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
73850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
74050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                         UBool doDecompose,
742b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                         UnicodeString &safeMiddle,
74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                         ReorderingBuffer &buffer,
74450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                         UErrorCode &errorCode) const {
745b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    buffer.copyReorderableSuffixTo(safeMiddle);
74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(doDecompose) {
74750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        decompose(src, limit, &buffer, errorCode);
74850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
75050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Just merge the strings at the boundary.
75150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ForwardUTrie2StringIterator iter(normTrie, src, limit);
75250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t firstCC, prevCC, cc;
75350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    firstCC=prevCC=cc=getCC(iter.next16());
75450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(cc!=0) {
75550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        prevCC=cc;
75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cc=getCC(iter.next16());
75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        limit=u_strchr(iter.codePointStart, 0);
760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
76154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
76254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) {
76350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        buffer.appendZeroCC(iter.codePointStart, limit, errorCode);
76454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    }
76550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
76650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
76750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Note: hasDecompBoundary() could be implemented as aliases to
76850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
76950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// at the cost of building the FCD trie for a decomposition normalizer.
77050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasDecompBoundary(UChar32 c, UBool before) const {
77150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
77250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(c<minDecompNoCP) {
77350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
77450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
77550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t norm16=getNorm16(c);
77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
77750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
77850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(norm16>MIN_NORMAL_MAYBE_YES) {
77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;  // ccc!=0
78050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
78150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
78250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
78350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
78450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
785103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            uint16_t firstUnit=*mapping;
78650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
78750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
78850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
78950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!before) {
79050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // decomp after-boundary: same as hasFCDBoundaryAfter(),
79150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // fcd16<=1 || trailCC==0
79250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(firstUnit>0x1ff) {
79350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;  // trailCC>1
79450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
79550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(firstUnit<=0xff) {
79650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return TRUE;  // trailCC==0
79750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
79850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // if(trailCC==1) test leadCC==0, same as checking for before-boundary
79950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
80050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // TRUE if leadCC==0 (hasFCDBoundaryBefore())
801103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
80250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
80350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
80450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
80550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
80650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
80750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Finds the recomposition result for
80850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a forward-combining "lead" character,
80950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * specified with a pointer to its compositions list,
81050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and a backward-combining "trail" character.
81150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
81250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If the lead and trail characters combine, then this function returns
81350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the following "compositeAndFwd" value:
81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Bits 21..1  composite character
81550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Bit      0  set if the composite is a forward-combining starter
81650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * otherwise it returns -1.
81750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
81850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The compositions list has (trail, compositeAndFwd) pair entries,
81950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * encoded as either pairs or triples of 16-bit units.
82050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The last entry has the high bit of its first unit set.
82150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
82250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The list is sorted by ascending trail characters (there are no duplicates).
82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * A linear search is used.
82450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
82550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * See normalizer2impl.h for a more detailed description
82650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * of the compositions list format.
82750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
82850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
82950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t key1, firstUnit;
83050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(trail<COMP_1_TRAIL_LIMIT) {
83150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // trail character is 0..33FF
83250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // result entry may have 2 or 3 units
83350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        key1=(uint16_t)(trail<<1);
83450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(key1>(firstUnit=*list)) {
83550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            list+=2+(firstUnit&COMP_1_TRIPLE);
83650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
83750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
83850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(firstUnit&COMP_1_TRIPLE) {
83950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return ((int32_t)list[1]<<16)|list[2];
84050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
84150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return list[1];
84250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
84350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
84450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
84550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // trail character is 3400..10FFFF
84650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // result entry has 3 units
84750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
84827f654740f2a26ad62a5c155af9199af9e69b889claireho                        (((trail>>COMP_1_TRAIL_SHIFT))&
84927f654740f2a26ad62a5c155af9199af9e69b889claireho                          ~COMP_1_TRIPLE));
85050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
85150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t secondUnit;
85250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(;;) {
85350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(key1>(firstUnit=*list)) {
85450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                list+=2+(firstUnit&COMP_1_TRIPLE);
85550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
85650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(key2>(secondUnit=list[1])) {
85750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(firstUnit&COMP_1_LAST_TUPLE) {
85850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
85950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
86050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        list+=3;
86150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
86250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2];
86450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
86550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
86650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
86750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
86850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
86950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
87050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
87150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
87250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return -1;
87350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
87450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
87527f654740f2a26ad62a5c155af9199af9e69b889claireho/**
87627f654740f2a26ad62a5c155af9199af9e69b889claireho  * @param list some character's compositions list
87727f654740f2a26ad62a5c155af9199af9e69b889claireho  * @param set recursively receives the composites from these compositions
87827f654740f2a26ad62a5c155af9199af9e69b889claireho  */
87927f654740f2a26ad62a5c155af9199af9e69b889clairehovoid Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const {
88027f654740f2a26ad62a5c155af9199af9e69b889claireho    uint16_t firstUnit;
88127f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t compositeAndFwd;
88227f654740f2a26ad62a5c155af9199af9e69b889claireho    do {
88327f654740f2a26ad62a5c155af9199af9e69b889claireho        firstUnit=*list;
88427f654740f2a26ad62a5c155af9199af9e69b889claireho        if((firstUnit&COMP_1_TRIPLE)==0) {
88527f654740f2a26ad62a5c155af9199af9e69b889claireho            compositeAndFwd=list[1];
88627f654740f2a26ad62a5c155af9199af9e69b889claireho            list+=2;
88727f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
88827f654740f2a26ad62a5c155af9199af9e69b889claireho            compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2];
88927f654740f2a26ad62a5c155af9199af9e69b889claireho            list+=3;
89027f654740f2a26ad62a5c155af9199af9e69b889claireho        }
89127f654740f2a26ad62a5c155af9199af9e69b889claireho        UChar32 composite=compositeAndFwd>>1;
89227f654740f2a26ad62a5c155af9199af9e69b889claireho        if((compositeAndFwd&1)!=0) {
89327f654740f2a26ad62a5c155af9199af9e69b889claireho            addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
89427f654740f2a26ad62a5c155af9199af9e69b889claireho        }
89527f654740f2a26ad62a5c155af9199af9e69b889claireho        set.add(composite);
89627f654740f2a26ad62a5c155af9199af9e69b889claireho    } while((firstUnit&COMP_1_LAST_TUPLE)==0);
89727f654740f2a26ad62a5c155af9199af9e69b889claireho}
89827f654740f2a26ad62a5c155af9199af9e69b889claireho
89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Recomposes the buffer text starting at recomposeStartIndex
90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (which is in NFD - decomposed and canonically ordered),
90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and truncates the buffer contents.
90350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
90450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that recomposition never lengthens the text:
90550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Any character consists of either one or two code units;
90650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a composition may contain at most one more code unit than the original starter,
90750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * while the combining mark that is removed has at least one code unit.
90850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
90950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
91050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                UBool onlyContiguous) const {
91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *p=buffer.getStart()+recomposeStartIndex;
91250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *limit=buffer.getLimit();
91350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(p==limit) {
91450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
91550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
91650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *starter, *pRemove, *q, *r;
91850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint16_t *compositionsList;
91950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c, compositeAndFwd;
92050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16;
92150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t cc, prevCC;
92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool starterIsSupplementary;
92350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
92450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Some of the following variables are not used until we have a forward-combining starter
92550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // and are only initialized now to avoid compiler warnings.
92650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    compositionsList=NULL;  // used as indicator for whether we have a forward-combining starter
92750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    starter=NULL;
92850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    starterIsSupplementary=FALSE;
92950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    prevCC=0;
93050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
93150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
93250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
93350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cc=getCCFromYesOrMaybe(norm16);
93450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if( // this character combines backward and
93550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMaybe(norm16) &&
93650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // we have seen a starter that combines forward and
93750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            compositionsList!=NULL &&
93850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // the backward-combining character is not blocked
93950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            (prevCC<cc || prevCC==0)
94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ) {
94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(isJamoVT(norm16)) {
94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // c is a Jamo V/T, see if we can compose it with the previous character.
94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(c<Hangul::JAMO_T_BASE) {
94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE);
94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prev<Hangul::JAMO_L_COUNT) {
94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        pRemove=p-1;
94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar syllable=(UChar)
94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            (Hangul::HANGUL_BASE+
95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             Hangul::JAMO_T_COUNT);
95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar t;
95350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
95450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            ++p;
95550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            syllable+=t;  // The next character was a Jamo T.
95650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *starter=syllable;
95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // remove the Jamo V/T
95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        q=pRemove;
96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        r=p;
96150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        while(r<limit) {
96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            *q++=*r++;
96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        limit=q;
96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        p=pRemove;
96650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
96750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
96850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                /*
96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 * No "else" for Jamo T:
97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 * Since the input is in NFD, there are no Hangul LV syllables that
97150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 * a Jamo T could combine with.
97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 * All Jamo Ts are combined above when handling Jamo Vs.
97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 */
97450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(p==limit) {
97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
97750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                compositionsList=NULL;
97850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
97950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if((compositeAndFwd=combine(compositionsList, c))>=0) {
98050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The starter and the combining mark (c) do combine.
98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 composite=compositeAndFwd>>1;
98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Replace the starter with the composite, remove the combining mark.
98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                pRemove=p-U16_LENGTH(c);  // pRemove & p: start & limit of the combining mark
98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(starterIsSupplementary) {
98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(U_IS_SUPPLEMENTARY(composite)) {
98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // both are supplementary
98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        starter[0]=U16_LEAD(composite);
98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        starter[1]=U16_TRAIL(composite);
99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *starter=(UChar)composite;
99250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // The composite is shorter than the starter,
99350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // move the intermediate characters forward one.
99450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        starterIsSupplementary=FALSE;
99550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        q=starter+1;
99650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        r=q+1;
99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        while(r<pRemove) {
99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            *q++=*r++;
99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --pRemove;
100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else if(U_IS_SUPPLEMENTARY(composite)) {
100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The composite is longer than the starter,
100450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // move the intermediate characters back one.
100550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starterIsSupplementary=TRUE;
100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ++starter;  // temporarily increment for the loop boundary
100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    q=pRemove;
100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    r=++pRemove;
100950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while(starter<q) {
101050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *--r=*--q;
101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *starter=U16_TRAIL(composite);
101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *--starter=U16_LEAD(composite);  // undo the temporary increment
101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // both are on the BMP
101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *starter=(UChar)composite;
101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
101850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
101950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                /* remove the combining mark by moving the following text over it */
102050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(pRemove<p) {
102150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    q=pRemove;
102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    r=p;
102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while(r<limit) {
102450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *q++=*r++;
102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
102650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    limit=q;
102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    p=pRemove;
102850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Keep prevCC because we removed the combining mark.
103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(p==limit) {
103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
103350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
103450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Is the composite a starter that combines forward?
103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(compositeAndFwd&1) {
103650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    compositionsList=
103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        getCompositionsListForComposite(getNorm16(composite));
103850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    compositionsList=NULL;
104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // We combined; continue with looking for compositions.
104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // no combination this time
104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        prevCC=cc;
104950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(p==limit) {
105050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // If c did not combine, then check if it is a starter.
105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(cc==0) {
105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Found a new starter.
105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) {
105750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // It may combine with something, prepare for it.
105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U_IS_BMP(c)) {
105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starterIsSupplementary=FALSE;
106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starter=p-1;
106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
106250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starterIsSupplementary=TRUE;
106350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    starter=p-2;
106450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
106550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
106650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(onlyContiguous) {
106750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // FCC: no discontiguous compositions; any intervening character blocks.
106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            compositionsList=NULL;
106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    buffer.setReorderingLimit(limit);
107250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
107350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1074103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusUChar32
1075103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusNormalizer2Impl::composePair(UChar32 a, UChar32 b) const {
1076103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint16_t norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16=0
1077103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const uint16_t *list;
1078103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(isInert(norm16)) {
1079103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return U_SENTINEL;
1080103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else if(norm16<minYesNoMappingsOnly) {
1081103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(isJamoL(norm16)) {
1082103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            b-=Hangul::JAMO_V_BASE;
1083103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(0<=b && b<Hangul::JAMO_V_COUNT) {
1084103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return
1085103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    (Hangul::HANGUL_BASE+
1086103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                     ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)*
1087103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                     Hangul::JAMO_T_COUNT);
1088103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else {
1089103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return U_SENTINEL;
1090103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
1091103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else if(isHangul(norm16)) {
1092103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            b-=Hangul::JAMO_T_BASE;
1093103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(Hangul::isHangulWithoutJamoT(a) && 0<b && b<Hangul::JAMO_T_COUNT) {  // not b==0!
1094103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return a+b;
1095103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else {
1096103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return U_SENTINEL;
1097103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
1098103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else {
1099103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // 'a' has a compositions list in extraData
1100103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            list=extraData+norm16;
1101103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if(norm16>minYesNo) {  // composite 'a' has both mapping & compositions list
1102103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                list+=  // mapping pointer
1103103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    1+  // +1 to skip the first unit with the mapping lenth
1104103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    (*list&MAPPING_LENGTH_MASK);  // + mapping length
1105103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
1106103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
1107103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
1108103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return U_SENTINEL;
1109103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    } else {
1110103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        list=maybeYesCompositions+norm16-minMaybeYes;
1111103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
1112103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(b<0 || 0x10ffff<b) {  // combine(list, b) requires a valid code point b
1113103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return U_SENTINEL;
1114103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
1115103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
1116103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return combine(list, b)>>1;
1117103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#else
1118103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    int32_t compositeAndFwd=combine(list, b);
1119103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL;
1120103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif
1121103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius}
1122103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
112350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
112450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// doCompose: normalize
112550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// !doCompose: isNormalized (buffer must be empty and initialized)
112650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool
112750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::compose(const UChar *src, const UChar *limit,
112850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UBool onlyContiguous,
112950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UBool doCompose,
113050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         ReorderingBuffer &buffer,
113150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UErrorCode &errorCode) const {
113250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /*
113350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * prevBoundary points to the last character before the current one
113450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * that has a composition boundary before it with ccc==0 and quick check "yes".
113550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Keeping track of prevBoundary saves us looking for a composition boundary
113650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * when we find a "no" or "maybe".
113750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
113850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * When we back out from prevSrc back to prevBoundary,
113950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * then we also remove those same characters (which had been simply copied
114050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * or canonically-order-inserted) from the ReorderingBuffer.
114150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Therefore, at all times, the [prevBoundary..prevSrc[ source units
114250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * must correspond 1:1 to destination units at the end of the destination buffer.
114350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
114450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevBoundary=src;
114527f654740f2a26ad62a5c155af9199af9e69b889claireho    UChar32 minNoMaybeCP=minCompNoMaybeCP;
114627f654740f2a26ad62a5c155af9199af9e69b889claireho    if(limit==NULL) {
114727f654740f2a26ad62a5c155af9199af9e69b889claireho        src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP,
114827f654740f2a26ad62a5c155af9199af9e69b889claireho                                           doCompose ? &buffer : NULL,
114927f654740f2a26ad62a5c155af9199af9e69b889claireho                                           errorCode);
115027f654740f2a26ad62a5c155af9199af9e69b889claireho        if(U_FAILURE(errorCode)) {
115127f654740f2a26ad62a5c155af9199af9e69b889claireho            return FALSE;
115227f654740f2a26ad62a5c155af9199af9e69b889claireho        }
115327f654740f2a26ad62a5c155af9199af9e69b889claireho        if(prevBoundary<src) {
115427f654740f2a26ad62a5c155af9199af9e69b889claireho            // Set prevBoundary to the last character in the prefix.
115527f654740f2a26ad62a5c155af9199af9e69b889claireho            prevBoundary=src-1;
115627f654740f2a26ad62a5c155af9199af9e69b889claireho        }
115727f654740f2a26ad62a5c155af9199af9e69b889claireho        limit=u_strchr(src, 0);
115827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
115927f654740f2a26ad62a5c155af9199af9e69b889claireho
116050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc;
116150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=0;
116250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16=0;
116350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
116450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // only for isNormalized
116550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t prevCC=0;
116650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
116750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
116850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // count code units below the minimum or with irrelevant data for the quick check
116950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(prevSrc=src; src!=limit;) {
117050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( (c=*src)<minNoMaybeCP ||
117150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
117250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
117350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
117450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(!U16_IS_SURROGATE(c)) {
117550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
117650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
117750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c2;
117850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_SURROGATE_LEAD(c)) {
117950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
118050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c, c2);
118150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
118250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else /* trail surrogate */ {
118350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
118450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --src;
118550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c2, c);
118650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
118750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
118850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
118950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    src+=U16_LENGTH(c);
119050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
119150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
119250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
119350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
119450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
119550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // copy these code units all at once
119650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=prevSrc) {
119750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(doCompose) {
119850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!buffer.appendZeroCC(prevSrc, src, errorCode)) {
119950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
120050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
120150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
120250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC=0;
120350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
120450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(src==limit) {
120550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
120650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
120750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Set prevBoundary to the last character in the quick check loop.
120850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=src-1;
120950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
121050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_IS_LEAD(*(prevBoundary-1))
121150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
121250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                --prevBoundary;
121350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
121450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // The start of the current character (c).
121550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevSrc=src;
121650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(src==limit) {
121750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
121850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
121950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
122050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src+=U16_LENGTH(c);
122150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
122250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
122350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
122450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * or has ccc!=0.
122550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Check for Jamo V/T, then for regular characters.
122650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * c is not a Hangul syllable or Jamo L because those have "yes" properties.
122750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
122850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
122950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar prev=*(prevSrc-1);
123050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UBool needToDecompose=FALSE;
123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(c<Hangul::JAMO_T_BASE) {
123250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
123350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prev=(UChar)(prev-Hangul::JAMO_L_BASE);
123450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(prev<Hangul::JAMO_L_COUNT) {
123550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(!doCompose) {
123650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return FALSE;
123750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
123850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar syllable=(UChar)
123950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (Hangul::HANGUL_BASE+
124050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
124150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         Hangul::JAMO_T_COUNT);
124250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar t;
124350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(src!=limit && (t=(UChar)(*src-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
124450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        ++src;
124550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        syllable+=t;  // The next character was a Jamo T.
124650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        prevBoundary=src;
124750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        buffer.setLastChar(syllable);
124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        continue;
124950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // If we see L+V+x where x!=T then we drop to the slow path,
125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // decompose and recompose.
125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // This is to deal with NFKC finding normal L and V but a
125350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // compatibility variant of a T. We need to either fully compose that
125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // combination here (which would complicate the code and may not work
125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // with strange custom data) or use the slow path -- or else our replacing
125650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // two input characters (L+V) with one output character (LV syllable)
125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // would violate the invariant that [prevBoundary..prevSrc[ has the same
125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // length as what we appended to the buffer since prevBoundary.
125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    needToDecompose=TRUE;
126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(Hangul::isHangulWithoutJamoT(prev)) {
126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // c is a Jamo Trailing consonant,
126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // compose with previous Hangul LV that does not contain a Jamo T.
126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!doCompose) {
126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                buffer.setLastChar((UChar)(prev+c-Hangul::JAMO_T_BASE));
126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary=src;
126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!needToDecompose) {
127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The Jamo V/T did not compose into a Hangul syllable.
127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(doCompose) {
127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(!buffer.appendBMP((UChar)c, 0, errorCode)) {
127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevCC=0;
127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Source buffer pointers:
128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *  all done      quick check   current char  not yet
128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                "yes" but     (c)           processed
128850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                may combine
128950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                forward
129050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * [-------------[-------------[-------------[-------------[
129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * |             |             |             |             |
129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * orig. src     prevBoundary  prevSrc       src           limit
129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Destination buffer pointers inside the ReorderingBuffer:
129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *  all done      might take    not filled yet
129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                characters for
129950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                reordering
130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * [-------------[-------------[-------------[
130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * |             |             |             |
130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * start         reorderStart  limit         |
130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *                             +remainingCap.+
130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(norm16>=MIN_YES_YES_WITH_CC) {
130650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint8_t cc=(uint8_t)norm16;  // cc!=0
130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( onlyContiguous &&  // FCC
130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (doCompose ? buffer.getLastCC() : prevCC)==0 &&
130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary<prevSrc &&
131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // passed the quick check "yes && ccc==0" test.
131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Check whether the last character was a "yesYes" or a "yesNo".
131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If a "yesNo", then we get its trailing ccc from its
131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // mapping and check for canonical order.
131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // All other cases are ok.
131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc
131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fails FCD test, need to decompose and contiguously recompose.
132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!doCompose) {
132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(doCompose) {
132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(!buffer.append(c, cc, errorCode)) {
132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(prevCC<=cc) {
132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC=cc;
133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Find appropriate boundaries around this character,
134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * decompose the source text from between the boundaries,
134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * and recompose it.
134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         *
134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * We may need to remove the last few characters from the ReorderingBuffer
134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * to account for source text that was copied or appended
134550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * but needs to take part in the recomposition.
134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Find the last composition boundary in [prevBoundary..src[.
135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * It is either the decomposition of the current character (at prevSrc),
135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * or prevBoundary.
135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(hasCompBoundaryBefore(c, norm16)) {
135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=prevSrc;
135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(doCompose) {
135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer.removeSuffix((int32_t)(prevSrc-prevBoundary));
135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Find the next composition boundary in [src..limit[ -
136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // modifies src to point to the next starter.
136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src=(UChar *)findNextCompBoundary(src, limit);
136250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t recomposeStartIndex=buffer.length();
136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(!decomposeShort(prevBoundary, src, buffer, errorCode)) {
136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        recompose(buffer, recomposeStartIndex, onlyContiguous);
136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(!doCompose) {
137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!buffer.equals(prevBoundary, src)) {
137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer.remove();
137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCC=0;
137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
137650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Move to the next starter. We never need to look back before this point again.
137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        prevBoundary=src;
137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return TRUE;
138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Very similar to compose(): Make the same changes in both places if relevant.
138450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pQCResult==NULL: spanQuickCheckYes
138550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES)
138650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
138750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
138850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                   UBool onlyContiguous,
138950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                   UNormalizationCheckResult *pQCResult) const {
139027f654740f2a26ad62a5c155af9199af9e69b889claireho    /*
139127f654740f2a26ad62a5c155af9199af9e69b889claireho     * prevBoundary points to the last character before the current one
139227f654740f2a26ad62a5c155af9199af9e69b889claireho     * that has a composition boundary before it with ccc==0 and quick check "yes".
139327f654740f2a26ad62a5c155af9199af9e69b889claireho     */
139427f654740f2a26ad62a5c155af9199af9e69b889claireho    const UChar *prevBoundary=src;
139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 minNoMaybeCP=minCompNoMaybeCP;
139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(limit==NULL) {
139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode errorCode=U_ZERO_ERROR;
139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode);
139927f654740f2a26ad62a5c155af9199af9e69b889claireho        if(prevBoundary<src) {
140027f654740f2a26ad62a5c155af9199af9e69b889claireho            // Set prevBoundary to the last character in the prefix.
140127f654740f2a26ad62a5c155af9199af9e69b889claireho            prevBoundary=src-1;
140227f654740f2a26ad62a5c155af9199af9e69b889claireho        }
140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit=u_strchr(src, 0);
140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc;
140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=0;
140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16=0;
140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t prevCC=0;
141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // count code units below the minimum or with irrelevant data for the quick check
141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(prevSrc=src;;) {
141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(src==limit) {
141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return src;
141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( (c=*src)<minNoMaybeCP ||
141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
141950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
142050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(!U16_IS_SURROGATE(c)) {
142250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c2;
142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_SURROGATE_LEAD(c)) {
142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c, c2);
142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else /* trail surrogate */ {
143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        --src;
143250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c=U16_GET_SUPPLEMENTARY(c2, c);
143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    src+=U16_LENGTH(c);
143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=prevSrc) {
144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Set prevBoundary to the last character in the quick check loop.
144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=src-1;
144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_IS_LEAD(*(prevBoundary-1))
144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                --prevBoundary;
144950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
145050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCC=0;
145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // The start of the current character (c).
145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevSrc=src;
145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src+=U16_LENGTH(c);
145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * or has ccc!=0.
146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isMaybeOrNonZeroCC(norm16)) {
146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint8_t cc=getCCFromYesOrMaybe(norm16);
146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if( onlyContiguous &&  // FCC
146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                cc!=0 &&
146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC==0 &&
146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary<prevSrc &&
146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // prevCC==0 && prevBoundary<prevSrc tell us that
146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // passed the quick check "yes && ccc==0" test.
147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Check whether the last character was a "yesYes" or a "yesNo".
147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If a "yesNo", then we get its trailing ccc from its
147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // mapping and check for canonical order.
147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // All other cases are ok.
147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc
147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ) {
147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fails FCD test.
147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(prevCC<=cc || cc==0) {
147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevCC=cc;
147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(norm16<MIN_YES_YES_WITH_CC) {
148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(pQCResult!=NULL) {
148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *pQCResult=UNORM_MAYBE;
148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return prevBoundary;
148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continue;
148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(pQCResult!=NULL) {
149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pQCResult=UNORM_NO;
149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return prevBoundary;
149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
149650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit,
149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UBool doCompose,
149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UBool onlyContiguous,
1499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                       UnicodeString &safeMiddle,
150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       ReorderingBuffer &buffer,
150150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UErrorCode &errorCode) const {
150250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!buffer.isEmpty()) {
150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *firstStarterInSrc=findNextCompBoundary(src, limit);
150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=firstStarterInSrc) {
150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                                    buffer.getLimit());
1507b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
1508b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UnicodeString middle(lastStarterInDest, destSuffixLength);
1509b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            buffer.removeSuffix(destSuffixLength);
1510b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            safeMiddle=middle;
151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            middle.append(src, (int32_t)(firstStarterInSrc-src));
151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *middleStart=middle.getBuffer();
151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            compose(middleStart, middleStart+middle.length(), onlyContiguous,
151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    TRUE, buffer, errorCode);
151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(U_FAILURE(errorCode)) {
151650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return;
151750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
151850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            src=firstStarterInSrc;
151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
152150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(doCompose) {
152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
152350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
1524b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
1525b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            limit=u_strchr(src, 0);
1526b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        buffer.appendZeroCC(src, limit, errorCode);
152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Does c have a composition boundary before it?
153350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * True if its decomposition begins with a character that has
153450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (isCompYesAndZeroCC()) so we need not decompose.
153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
153850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isCompYesAndZeroCC(norm16)) {
154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isMaybeOrNonZeroCC(norm16)) {
154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm16=getNorm16(c);
154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
1550103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            uint16_t firstUnit=*mapping;
155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
1554103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD) && (*(mapping-1)&0xff00)) {
155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;  // non-zero leadCC
155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
1557103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            int32_t i=1;  // skip over the firstUnit
155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar32 c;
155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT_UNSAFE(mapping, i, c);
156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return isCompYesAndZeroCC(getNorm16(c));
156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
156550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool Normalizer2Impl::hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const {
156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uint16_t norm16=getNorm16(c);
156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(isInert(norm16)) {
156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return TRUE;
157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(norm16<=minYesNo) {
1571103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Hangul: norm16==minYesNo
1572103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Hangul LVT has a boundary after it.
157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Hangul LV and non-inert yesYes characters combine forward.
157450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return isHangul(norm16) && !Hangul::isHangulWithoutJamoT((UChar)c);
157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) {
157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
157750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(isDecompNoAlgorithmic(norm16)) {
157850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c=mapAlgorithmic(c, norm16);
157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data.
158150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // If testInert, then c must be a yesNo character which has lccc=0,
158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // otherwise it could be a noNo.
158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint16_t firstUnit=*mapping;
158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // TRUE if
1586103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            //   not MAPPING_NO_COMP_BOUNDARY_AFTER
1587103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            //     (which is set if
1588103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            //       c is not deleted, and
1589103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            //       it and its decomposition do not combine forward, and it has a starter)
1590103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            //   and if FCC then trailCC<=1
159150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return
1592103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                (firstUnit&MAPPING_NO_COMP_BOUNDARY_AFTER)==0 &&
159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (!onlyContiguous || firstUnit<=0x1ff);
159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
159850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p) const {
159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    BackwardUTrie2StringIterator iter(normTrie, start, p);
160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16;
160150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm16=iter.previous16();
160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(!hasCompBoundaryBefore(iter.codePoint, norm16));
160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // but that's probably not worth the extra cost.
160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return iter.codePointStart;
160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
160850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
160950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit) const {
161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ForwardUTrie2StringIterator iter(normTrie, p, limit);
161150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t norm16;
161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    do {
161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        norm16=iter.next16();
161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } while(!hasCompBoundaryBefore(iter.codePoint, norm16));
161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return iter.codePointStart;
161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1618103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Note: normalizer2impl.cpp r30982 (2011-nov-27)
1619103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// still had getFCDTrie() which built and cached an FCD trie.
1620103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// That provided faster access to FCD data than getFCD16FromNormData()
1621103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// but required synchronization and consumed some 10kB of heap memory
1622103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// in any process that uses FCD (e.g., via collation).
1623103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// tccc180[] and smallFCD[] are intended to help with any loss of performance,
1624103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// at least for Latin & CJK.
162550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1626103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius// Gets the FCD value from the regular normalization data.
1627103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusuint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Only loops for 1:1 algorithmic mappings.
162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
1630103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        uint16_t norm16=getNorm16(c);
1631103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(norm16<=minYesNo) {
163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // no decomposition or Hangul syllable, all zeros
1633103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return 0;
1634103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else if(norm16>=MIN_NORMAL_MAYBE_YES) {
1635103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // combining mark
1636103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            norm16&=0xff;
1637103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return norm16|(norm16<<8);
1638103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else if(norm16>=minMaybeYes) {
1639103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return 0;
1640103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        } else if(isDecompNoAlgorithmic(norm16)) {
1641103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            c=mapAlgorithmic(c, norm16);
164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // c decomposes, get everything from the variable-length extra data
164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint16_t *mapping=getMapping(norm16);
164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uint16_t firstUnit=*mapping;
164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
164750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // A character that is deleted (maps to an empty string) must
164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // get the worst-case lccc and tccc values because arbitrary
164950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // characters on both sides will become adjacent.
1650103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return 0x1ff;
165150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
1652103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                norm16=firstUnit>>8;  // tccc
165350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
1654103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    norm16|=*(mapping-1)&0xff00;  // lccc
165550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
1656103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                return norm16;
165750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
165850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
165950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
166050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
166150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
166250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Dual functionality:
166350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer!=NULL: normalize
166450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
166550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *
166650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
166750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         ReorderingBuffer *buffer,
166850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UErrorCode &errorCode) const {
166927f654740f2a26ad62a5c155af9199af9e69b889claireho    // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
167027f654740f2a26ad62a5c155af9199af9e69b889claireho    // Similar to the prevBoundary in the compose() implementation.
167127f654740f2a26ad62a5c155af9199af9e69b889claireho    const UChar *prevBoundary=src;
167227f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t prevFCD16=0;
167350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(limit==NULL) {
167450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src=copyLowPrefixFromNulTerminated(src, MIN_CCC_LCCC_CP, buffer, errorCode);
167550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(U_FAILURE(errorCode)) {
167650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return src;
167750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
167827f654740f2a26ad62a5c155af9199af9e69b889claireho        if(prevBoundary<src) {
167927f654740f2a26ad62a5c155af9199af9e69b889claireho            prevBoundary=src;
168027f654740f2a26ad62a5c155af9199af9e69b889claireho            // We know that the previous character's lccc==0.
168127f654740f2a26ad62a5c155af9199af9e69b889claireho            // Fetching the fcd16 value was deferred for this below-U+0300 code point.
1682103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            prevFCD16=getFCD16(*(src-1));
168327f654740f2a26ad62a5c155af9199af9e69b889claireho            if(prevFCD16>1) {
168427f654740f2a26ad62a5c155af9199af9e69b889claireho                --prevBoundary;
168527f654740f2a26ad62a5c155af9199af9e69b889claireho            }
168627f654740f2a26ad62a5c155af9199af9e69b889claireho        }
168750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        limit=u_strchr(src, 0);
168850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
168950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
169050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Note: In this function we use buffer->appendZeroCC() because we track
169150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // the lead and trail combining classes here, rather than leaving it to
169250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // the ReorderingBuffer.
169350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // The exception is the call to decomposeShort() which uses the buffer
169450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // in the normal way.
169550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
169650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *prevSrc;
169750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c=0;
169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint16_t fcd16=0;
169950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // count code units with lccc==0
170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for(prevSrc=src; src!=limit;) {
170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((c=*src)<MIN_CCC_LCCC_CP) {
170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevFCD16=~c;
170550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
1706103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
1707103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                prevFCD16=0;
170850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
170950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
1710103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if(U16_IS_SURROGATE(c)) {
1711103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar c2;
1712103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if(U16_IS_SURROGATE_LEAD(c)) {
1713103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
1714103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            c=U16_GET_SUPPLEMENTARY(c, c2);
1715103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        }
1716103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    } else /* trail surrogate */ {
1717103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
1718103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            --src;
1719103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            c=U16_GET_SUPPLEMENTARY(c2, c);
1720103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        }
172150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
172250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
1723103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if((fcd16=getFCD16FromNormData(c))<=0xff) {
172450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevFCD16=fcd16;
172550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    src+=U16_LENGTH(c);
172650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
172750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
172850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
172950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
173050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
173150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // copy these code units all at once
173250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=prevSrc) {
173350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) {
173450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
173550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
173650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(src==limit) {
173750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
173850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
173950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=src;
174050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // We know that the previous character's lccc==0.
174150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(prevFCD16<0) {
174250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetching the fcd16 value was deferred for this below-U+0300 code point.
1743103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UChar32 prev=~prevFCD16;
1744103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
174550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(prevFCD16>1) {
174650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    --prevBoundary;
174750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
174850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
174950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const UChar *p=src-1;
175050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) {
175150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    --p;
175250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Need to fetch the previous character's FCD value because
175350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // prevFCD16 was just for the trail surrogate code point.
1754103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));
175550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
175650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
175750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(prevFCD16>1) {
175850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    prevBoundary=p;
175950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
176050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
176150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // The start of the current character (c).
176250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevSrc=src;
176350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(src==limit) {
176450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
176550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
176650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
176750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        src+=U16_LENGTH(c);
176850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
176950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Check for proper order, and decompose locally if necessary.
177050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if((prevFCD16&0xff)<=(fcd16>>8)) {
177150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // proper order: prev tccc <= current lccc
177250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((fcd16&0xff)<=1) {
177350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                prevBoundary=src;
177450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
177550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) {
177650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
177750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
177850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevFCD16=fcd16;
177950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            continue;
178050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(buffer==NULL) {
178150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return prevBoundary;  // quick check "no"
178250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
178350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
178450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Back out the part of the source that we copied or appended
178550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * already but is now going to be decomposed.
178650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * prevSrc is set to after what was copied/appended.
178750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
178850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
178950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
179050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Find the part of the source that needs to be decomposed,
179150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * up to the next safe boundary.
179250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
179350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            src=findNextFCDBoundary(src, limit);
179450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
179550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * The source text does not fulfill the conditions for FCD.
179650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Decompose and reorder a limited piece of the text.
179750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
179850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(!decomposeShort(prevBoundary, src, *buffer, errorCode)) {
179950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
180050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
180150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevBoundary=src;
180250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevFCD16=0;
180350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
180450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
180550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return src;
180650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
180750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
180850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,
180950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UBool doMakeFCD,
1810b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                       UnicodeString &safeMiddle,
181150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       ReorderingBuffer &buffer,
181250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                       UErrorCode &errorCode) const {
181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!buffer.isEmpty()) {
181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
181550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(src!=firstBoundaryInSrc) {
181650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                                    buffer.getLimit());
1818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
1819b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UnicodeString middle(lastBoundaryInDest, destSuffixLength);
1820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            buffer.removeSuffix(destSuffixLength);
1821b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            safeMiddle=middle;
182250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            middle.append(src, (int32_t)(firstBoundaryInSrc-src));
182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *middleStart=middle.getBuffer();
182450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
182550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(U_FAILURE(errorCode)) {
182650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return;
182750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
182850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            src=firstBoundaryInSrc;
182950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
183050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
183150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(doMakeFCD) {
183250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        makeFCD(src, limit, &buffer, errorCode);
183350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
1834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
1835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            limit=u_strchr(src, 0);
1836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
183750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        buffer.appendZeroCC(src, limit, errorCode);
183850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
183950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
184050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
184150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const {
1842103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    while(start<p && previousFCD16(start, p)>0xff) {}
1843103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return p;
184450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
184550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
184650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const {
1847103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    while(p<limit) {
1848103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        const UChar *codePointStart=p;
1849103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        if(nextFCD16(p, limit)<=0xff) {
1850103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return codePointStart;
1851103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        }
1852103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
1853103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    return p;
185450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
185550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
185627f654740f2a26ad62a5c155af9199af9e69b889claireho// CanonicalIterator data -------------------------------------------------- ***
185727f654740f2a26ad62a5c155af9199af9e69b889claireho
185827f654740f2a26ad62a5c155af9199af9e69b889clairehoCanonIterData::CanonIterData(UErrorCode &errorCode) :
185927f654740f2a26ad62a5c155af9199af9e69b889claireho        trie(utrie2_open(0, 0, &errorCode)),
1860103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
186127f654740f2a26ad62a5c155af9199af9e69b889claireho
186227f654740f2a26ad62a5c155af9199af9e69b889clairehoCanonIterData::~CanonIterData() {
186327f654740f2a26ad62a5c155af9199af9e69b889claireho    utrie2_close(trie);
186427f654740f2a26ad62a5c155af9199af9e69b889claireho}
186527f654740f2a26ad62a5c155af9199af9e69b889claireho
186627f654740f2a26ad62a5c155af9199af9e69b889clairehovoid CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
186727f654740f2a26ad62a5c155af9199af9e69b889claireho    uint32_t canonValue=utrie2_get32(trie, decompLead);
186827f654740f2a26ad62a5c155af9199af9e69b889claireho    if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
186927f654740f2a26ad62a5c155af9199af9e69b889claireho        // origin is the first character whose decomposition starts with
187027f654740f2a26ad62a5c155af9199af9e69b889claireho        // the character for which we are setting the value.
187127f654740f2a26ad62a5c155af9199af9e69b889claireho        utrie2_set32(trie, decompLead, canonValue|origin, &errorCode);
187227f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
187327f654740f2a26ad62a5c155af9199af9e69b889claireho        // origin is not the first character, or it is U+0000.
187427f654740f2a26ad62a5c155af9199af9e69b889claireho        UnicodeSet *set;
187527f654740f2a26ad62a5c155af9199af9e69b889claireho        if((canonValue&CANON_HAS_SET)==0) {
187627f654740f2a26ad62a5c155af9199af9e69b889claireho            set=new UnicodeSet;
187727f654740f2a26ad62a5c155af9199af9e69b889claireho            if(set==NULL) {
187827f654740f2a26ad62a5c155af9199af9e69b889claireho                errorCode=U_MEMORY_ALLOCATION_ERROR;
187927f654740f2a26ad62a5c155af9199af9e69b889claireho                return;
188027f654740f2a26ad62a5c155af9199af9e69b889claireho            }
188127f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
188227f654740f2a26ad62a5c155af9199af9e69b889claireho            canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
188327f654740f2a26ad62a5c155af9199af9e69b889claireho            utrie2_set32(trie, decompLead, canonValue, &errorCode);
188427f654740f2a26ad62a5c155af9199af9e69b889claireho            canonStartSets.addElement(set, errorCode);
188527f654740f2a26ad62a5c155af9199af9e69b889claireho            if(firstOrigin!=0) {
188627f654740f2a26ad62a5c155af9199af9e69b889claireho                set->add(firstOrigin);
188727f654740f2a26ad62a5c155af9199af9e69b889claireho            }
188827f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
188927f654740f2a26ad62a5c155af9199af9e69b889claireho            set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)];
189027f654740f2a26ad62a5c155af9199af9e69b889claireho        }
189127f654740f2a26ad62a5c155af9199af9e69b889claireho        set->add(origin);
189227f654740f2a26ad62a5c155af9199af9e69b889claireho    }
189327f654740f2a26ad62a5c155af9199af9e69b889claireho}
189427f654740f2a26ad62a5c155af9199af9e69b889claireho
189527f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CDECL_BEGIN
189627f654740f2a26ad62a5c155af9199af9e69b889claireho
189727f654740f2a26ad62a5c155af9199af9e69b889claireho// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
189859d709d503bab6e2b61931737e662dd293b40578ccornelius//     context: the Normalizer2Impl
189927f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool U_CALLCONV
190027f654740f2a26ad62a5c155af9199af9e69b889clairehoenumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
190159d709d503bab6e2b61931737e662dd293b40578ccornelius    UErrorCode errorCode = U_ZERO_ERROR;
190259d709d503bab6e2b61931737e662dd293b40578ccornelius    if (value != 0) {
190359d709d503bab6e2b61931737e662dd293b40578ccornelius        Normalizer2Impl *impl = (Normalizer2Impl *)context;
190459d709d503bab6e2b61931737e662dd293b40578ccornelius        impl->makeCanonIterDataFromNorm16(
190559d709d503bab6e2b61931737e662dd293b40578ccornelius            start, end, (uint16_t)value, *impl->fCanonIterData, errorCode);
190659d709d503bab6e2b61931737e662dd293b40578ccornelius    }
190759d709d503bab6e2b61931737e662dd293b40578ccornelius    return U_SUCCESS(errorCode);
190827f654740f2a26ad62a5c155af9199af9e69b889claireho}
190927f654740f2a26ad62a5c155af9199af9e69b889claireho
191027f654740f2a26ad62a5c155af9199af9e69b889claireho
191159d709d503bab6e2b61931737e662dd293b40578ccornelius
191259d709d503bab6e2b61931737e662dd293b40578ccornelius// UInitOnce instantiation function for CanonIterData
191359d709d503bab6e2b61931737e662dd293b40578ccornelius
191459d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic void U_CALLCONV
191559d709d503bab6e2b61931737e662dd293b40578ccorneliusinitCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
191659d709d503bab6e2b61931737e662dd293b40578ccornelius    U_ASSERT(impl->fCanonIterData == NULL);
191759d709d503bab6e2b61931737e662dd293b40578ccornelius    impl->fCanonIterData = new CanonIterData(errorCode);
191859d709d503bab6e2b61931737e662dd293b40578ccornelius    if (impl->fCanonIterData == NULL) {
191927f654740f2a26ad62a5c155af9199af9e69b889claireho        errorCode=U_MEMORY_ALLOCATION_ERROR;
192027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
192159d709d503bab6e2b61931737e662dd293b40578ccornelius    if (U_SUCCESS(errorCode)) {
192259d709d503bab6e2b61931737e662dd293b40578ccornelius        utrie2_enum(impl->getNormTrie(), NULL, enumCIDRangeHandler, impl);
192359d709d503bab6e2b61931737e662dd293b40578ccornelius        utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode);
192459d709d503bab6e2b61931737e662dd293b40578ccornelius    }
192559d709d503bab6e2b61931737e662dd293b40578ccornelius    if (U_FAILURE(errorCode)) {
192659d709d503bab6e2b61931737e662dd293b40578ccornelius        delete impl->fCanonIterData;
192759d709d503bab6e2b61931737e662dd293b40578ccornelius        impl->fCanonIterData = NULL;
192827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
192927f654740f2a26ad62a5c155af9199af9e69b889claireho}
193027f654740f2a26ad62a5c155af9199af9e69b889claireho
193159d709d503bab6e2b61931737e662dd293b40578ccorneliusU_CDECL_END
193259d709d503bab6e2b61931737e662dd293b40578ccornelius
193327f654740f2a26ad62a5c155af9199af9e69b889clairehovoid Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
193427f654740f2a26ad62a5c155af9199af9e69b889claireho                                                  CanonIterData &newData,
193527f654740f2a26ad62a5c155af9199af9e69b889claireho                                                  UErrorCode &errorCode) const {
193627f654740f2a26ad62a5c155af9199af9e69b889claireho    if(norm16==0 || (minYesNo<=norm16 && norm16<minNoNo)) {
193727f654740f2a26ad62a5c155af9199af9e69b889claireho        // Inert, or 2-way mapping (including Hangul syllable).
193827f654740f2a26ad62a5c155af9199af9e69b889claireho        // We do not write a canonStartSet for any yesNo character.
193927f654740f2a26ad62a5c155af9199af9e69b889claireho        // Composites from 2-way mappings are added at runtime from the
194027f654740f2a26ad62a5c155af9199af9e69b889claireho        // starter's compositions list, and the other characters in
194127f654740f2a26ad62a5c155af9199af9e69b889claireho        // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
194227f654740f2a26ad62a5c155af9199af9e69b889claireho        // "maybe" characters.
194327f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
194427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
194527f654740f2a26ad62a5c155af9199af9e69b889claireho    for(UChar32 c=start; c<=end; ++c) {
194627f654740f2a26ad62a5c155af9199af9e69b889claireho        uint32_t oldValue=utrie2_get32(newData.trie, c);
194727f654740f2a26ad62a5c155af9199af9e69b889claireho        uint32_t newValue=oldValue;
194827f654740f2a26ad62a5c155af9199af9e69b889claireho        if(norm16>=minMaybeYes) {
194927f654740f2a26ad62a5c155af9199af9e69b889claireho            // not a segment starter if it occurs in a decomposition or has cc!=0
195027f654740f2a26ad62a5c155af9199af9e69b889claireho            newValue|=CANON_NOT_SEGMENT_STARTER;
195127f654740f2a26ad62a5c155af9199af9e69b889claireho            if(norm16<MIN_NORMAL_MAYBE_YES) {
195227f654740f2a26ad62a5c155af9199af9e69b889claireho                newValue|=CANON_HAS_COMPOSITIONS;
195327f654740f2a26ad62a5c155af9199af9e69b889claireho            }
195427f654740f2a26ad62a5c155af9199af9e69b889claireho        } else if(norm16<minYesNo) {
195527f654740f2a26ad62a5c155af9199af9e69b889claireho            newValue|=CANON_HAS_COMPOSITIONS;
195627f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
195727f654740f2a26ad62a5c155af9199af9e69b889claireho            // c has a one-way decomposition
195827f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar32 c2=c;
195927f654740f2a26ad62a5c155af9199af9e69b889claireho            uint16_t norm16_2=norm16;
196027f654740f2a26ad62a5c155af9199af9e69b889claireho            while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) {
196127f654740f2a26ad62a5c155af9199af9e69b889claireho                c2=mapAlgorithmic(c2, norm16_2);
196227f654740f2a26ad62a5c155af9199af9e69b889claireho                norm16_2=getNorm16(c2);
196327f654740f2a26ad62a5c155af9199af9e69b889claireho            }
196427f654740f2a26ad62a5c155af9199af9e69b889claireho            if(minYesNo<=norm16_2 && norm16_2<limitNoNo) {
196527f654740f2a26ad62a5c155af9199af9e69b889claireho                // c decomposes, get everything from the variable-length extra data
196627f654740f2a26ad62a5c155af9199af9e69b889claireho                const uint16_t *mapping=getMapping(norm16_2);
1967103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                uint16_t firstUnit=*mapping;
196827f654740f2a26ad62a5c155af9199af9e69b889claireho                int32_t length=firstUnit&MAPPING_LENGTH_MASK;
196927f654740f2a26ad62a5c155af9199af9e69b889claireho                if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
1970103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if(c==c2 && (*(mapping-1)&0xff)!=0) {
197127f654740f2a26ad62a5c155af9199af9e69b889claireho                        newValue|=CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
197227f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
197327f654740f2a26ad62a5c155af9199af9e69b889claireho                }
197427f654740f2a26ad62a5c155af9199af9e69b889claireho                // Skip empty mappings (no characters in the decomposition).
197527f654740f2a26ad62a5c155af9199af9e69b889claireho                if(length!=0) {
1976103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    ++mapping;  // skip over the firstUnit
197727f654740f2a26ad62a5c155af9199af9e69b889claireho                    // add c to first code point's start set
197827f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t i=0;
197927f654740f2a26ad62a5c155af9199af9e69b889claireho                    U16_NEXT_UNSAFE(mapping, i, c2);
198027f654740f2a26ad62a5c155af9199af9e69b889claireho                    newData.addToStartSet(c, c2, errorCode);
198127f654740f2a26ad62a5c155af9199af9e69b889claireho                    // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
198227f654740f2a26ad62a5c155af9199af9e69b889claireho                    // one-way mapping. A 2-way mapping is possible here after
198327f654740f2a26ad62a5c155af9199af9e69b889claireho                    // intermediate algorithmic mapping.
198427f654740f2a26ad62a5c155af9199af9e69b889claireho                    if(norm16_2>=minNoNo) {
198527f654740f2a26ad62a5c155af9199af9e69b889claireho                        while(i<length) {
198627f654740f2a26ad62a5c155af9199af9e69b889claireho                            U16_NEXT_UNSAFE(mapping, i, c2);
198727f654740f2a26ad62a5c155af9199af9e69b889claireho                            uint32_t c2Value=utrie2_get32(newData.trie, c2);
198827f654740f2a26ad62a5c155af9199af9e69b889claireho                            if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
198927f654740f2a26ad62a5c155af9199af9e69b889claireho                                utrie2_set32(newData.trie, c2, c2Value|CANON_NOT_SEGMENT_STARTER,
199027f654740f2a26ad62a5c155af9199af9e69b889claireho                                             &errorCode);
199127f654740f2a26ad62a5c155af9199af9e69b889claireho                            }
199227f654740f2a26ad62a5c155af9199af9e69b889claireho                        }
199327f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
199427f654740f2a26ad62a5c155af9199af9e69b889claireho                }
199527f654740f2a26ad62a5c155af9199af9e69b889claireho            } else {
199627f654740f2a26ad62a5c155af9199af9e69b889claireho                // c decomposed to c2 algorithmically; c has cc==0
199727f654740f2a26ad62a5c155af9199af9e69b889claireho                newData.addToStartSet(c, c2, errorCode);
199827f654740f2a26ad62a5c155af9199af9e69b889claireho            }
199927f654740f2a26ad62a5c155af9199af9e69b889claireho        }
200027f654740f2a26ad62a5c155af9199af9e69b889claireho        if(newValue!=oldValue) {
200127f654740f2a26ad62a5c155af9199af9e69b889claireho            utrie2_set32(newData.trie, c, newValue, &errorCode);
200227f654740f2a26ad62a5c155af9199af9e69b889claireho        }
200327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
200427f654740f2a26ad62a5c155af9199af9e69b889claireho}
200527f654740f2a26ad62a5c155af9199af9e69b889claireho
200627f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
200727f654740f2a26ad62a5c155af9199af9e69b889claireho    // Logically const: Synchronized instantiation.
200827f654740f2a26ad62a5c155af9199af9e69b889claireho    Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
200959d709d503bab6e2b61931737e662dd293b40578ccornelius    umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode);
201027f654740f2a26ad62a5c155af9199af9e69b889claireho    return U_SUCCESS(errorCode);
201127f654740f2a26ad62a5c155af9199af9e69b889claireho}
201227f654740f2a26ad62a5c155af9199af9e69b889claireho
201327f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
201459d709d503bab6e2b61931737e662dd293b40578ccornelius    return (int32_t)utrie2_get32(fCanonIterData->trie, c);
201527f654740f2a26ad62a5c155af9199af9e69b889claireho}
201627f654740f2a26ad62a5c155af9199af9e69b889claireho
201727f654740f2a26ad62a5c155af9199af9e69b889clairehoconst UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
201859d709d503bab6e2b61931737e662dd293b40578ccornelius    return *(const UnicodeSet *)fCanonIterData->canonStartSets[n];
201927f654740f2a26ad62a5c155af9199af9e69b889claireho}
202027f654740f2a26ad62a5c155af9199af9e69b889claireho
202127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
202227f654740f2a26ad62a5c155af9199af9e69b889claireho    return getCanonValue(c)>=0;
202327f654740f2a26ad62a5c155af9199af9e69b889claireho}
202427f654740f2a26ad62a5c155af9199af9e69b889claireho
202527f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
202627f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER;
202727f654740f2a26ad62a5c155af9199af9e69b889claireho    if(canonValue==0) {
202827f654740f2a26ad62a5c155af9199af9e69b889claireho        return FALSE;
202927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
203027f654740f2a26ad62a5c155af9199af9e69b889claireho    set.clear();
203127f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t value=canonValue&CANON_VALUE_MASK;
203227f654740f2a26ad62a5c155af9199af9e69b889claireho    if((canonValue&CANON_HAS_SET)!=0) {
203327f654740f2a26ad62a5c155af9199af9e69b889claireho        set.addAll(getCanonStartSet(value));
203427f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if(value!=0) {
203527f654740f2a26ad62a5c155af9199af9e69b889claireho        set.add(value);
203627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
203727f654740f2a26ad62a5c155af9199af9e69b889claireho    if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
203827f654740f2a26ad62a5c155af9199af9e69b889claireho        uint16_t norm16=getNorm16(c);
203927f654740f2a26ad62a5c155af9199af9e69b889claireho        if(norm16==JAMO_L) {
204027f654740f2a26ad62a5c155af9199af9e69b889claireho            UChar32 syllable=
204127f654740f2a26ad62a5c155af9199af9e69b889claireho                (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
204227f654740f2a26ad62a5c155af9199af9e69b889claireho            set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
204327f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
204427f654740f2a26ad62a5c155af9199af9e69b889claireho            addComposites(getCompositionsList(norm16), set);
204527f654740f2a26ad62a5c155af9199af9e69b889claireho        }
204627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
204727f654740f2a26ad62a5c155af9199af9e69b889claireho    return TRUE;
204827f654740f2a26ad62a5c155af9199af9e69b889claireho}
204927f654740f2a26ad62a5c155af9199af9e69b889claireho
205050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END
205150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
205250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Normalizer2 data swapping ----------------------------------------------- ***
205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
205450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_USE
205550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
205650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI int32_t U_EXPORT2
205750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_swap(const UDataSwapper *ds,
205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const void *inData, int32_t length, void *outData,
205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode *pErrorCode) {
206050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UDataInfo *pInfo;
206150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t headerSize;
206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint8_t *inBytes;
206450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t *outBytes;
206550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
206650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const int32_t *inIndexes;
206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t indexes[Normalizer2Impl::IX_MIN_MAYBE_YES+1];
206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i, offset, nextOffset, size;
207050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
207150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* udata_swapDataHeader checks the arguments */
207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
207650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
207750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* check data format and format version */
207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pInfo=(const UDataInfo *)((const char *)inData+4);
207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!(
208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Nrm2" */
208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[1]==0x72 &&
208250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[2]==0x6d &&
208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        pInfo->dataFormat[3]==0x32 &&
2084103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        (pInfo->formatVersion[0]==1 || pInfo->formatVersion[0]==2)
208550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    )) {
208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
208750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         pInfo->dataFormat[0], pInfo->dataFormat[1],
208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         pInfo->dataFormat[2], pInfo->dataFormat[3],
208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         pInfo->formatVersion[0]);
209050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pErrorCode=U_UNSUPPORTED_ERROR;
209150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
209350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
209450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    inBytes=(const uint8_t *)inData+headerSize;
209550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    outBytes=(uint8_t *)outData+headerSize;
209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    inIndexes=(const int32_t *)inBytes;
209850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
209950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length>=0) {
210050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length-=headerSize;
210127f654740f2a26ad62a5c155af9199af9e69b889claireho        if(length<(int32_t)sizeof(indexes)) {
210250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
210350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             length);
210450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
210550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return 0;
210650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
210750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
210850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
210950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* read the first few indexes */
211050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(i=0; i<=Normalizer2Impl::IX_MIN_MAYBE_YES; ++i) {
211150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        indexes[i]=udata_readInt32(ds, inIndexes[i]);
211250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
211350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
211450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* get the total length of the data */
211550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    size=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
211650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
211750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(length>=0) {
211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(length<size) {
211950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n",
212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             length);
212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
212250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return 0;
212350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
212450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
212550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* copy the data for inaccessible bytes */
212650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(inBytes!=outBytes) {
212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uprv_memcpy(outBytes, inBytes, size);
212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
213050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        offset=0;
213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* swap the int32_t indexes[] */
213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET];
213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
213550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        offset=nextOffset;
213650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* swap the UTrie2 */
213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        offset=nextOffset;
214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* swap the uint16_t extraData[] */
2143103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET];
214450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        offset=nextOffset;
214650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2147103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        /* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */
2148103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];
2149103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        offset=nextOffset;
2150103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(offset==size);
215250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
215350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
215450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return headerSize+size;
215550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif  // !UCONFIG_NO_NORMALIZATION
2158