16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 2009-2013, International Business Machines
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   file name:  normalizer2impl.cpp
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   encoding:   US-ASCII
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   tab size:   8 (not used)
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   indentation:4
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created on: 2009nov22
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created by: Markus W. Scherer
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/normalizer2.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/udata.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "mutex.h"
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "normalizer2impl.h"
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "putilimp.h"
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h"
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uset_imp.h"
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "utrie2.h"
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h"
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ReorderingBuffer -------------------------------------------------------- ***
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t length=str.length();
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    start=str.getBuffer(destCapacity);
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(start==NULL) {
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // getBuffer() already did str.setToBogus()
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errorCode=U_MEMORY_ALLOCATION_ERROR;
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    limit=start+length;
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    remainingCapacity=str.getCapacity()-length;
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reorderStart=start;
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(start==limit) {
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lastCC=0;
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        setIterator();
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lastCC=previousCC();
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Set reorderStart after the last code point with cc<=1 if there is one.
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(lastCC>1) {
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            while(previousCC()>1) {}
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        reorderStart=codePointLimit;
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const {
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t length=(int32_t)(limit-start);
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length==(int32_t)(otherLimit-otherStart) &&
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        0==u_memcmp(start, otherStart, length);
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(remainingCapacity<2 && !resize(2, errorCode)) {
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(lastCC<=cc || cc==0) {
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit[0]=U16_LEAD(c);
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit[1]=U16_TRAIL(c);
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit+=2;
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lastCC=cc;
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(cc<=1) {
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            reorderStart=limit;
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        insert(c, cc);
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    remainingCapacity-=2;
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::append(const UChar *s, int32_t length,
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                               uint8_t leadCC, uint8_t trailCC,
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                               UErrorCode &errorCode) {
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length==0) {
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return TRUE;
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(remainingCapacity<length && !resize(length, errorCode)) {
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    remainingCapacity-=length;
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(lastCC<=leadCC || leadCC==0) {
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(trailCC<=1) {
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            reorderStart=limit+length;
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(leadCC<=1) {
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            reorderStart=limit+1;  // Ok if not a code point boundary.
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *sLimit=s+length;
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        do { *limit++=*s++; } while(s!=sLimit);
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lastCC=trailCC;
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t i=0;
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c;
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U16_NEXT(s, i, length, c);
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        insert(c, leadCC);  // insert first code point
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while(i<length) {
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_NEXT(s, i, length, c);
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(i<length) {
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // s must be in NFD, otherwise we need to use getCC().
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leadCC=trailCC;
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            append(c, leadCC, errorCode);
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t cpLength=U16_LENGTH(c);
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) {
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    remainingCapacity-=cpLength;
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(cpLength==1) {
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *limit++=(UChar)c;
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit[0]=U16_LEAD(c);
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit[1]=U16_TRAIL(c);
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit+=2;
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    lastCC=0;
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reorderStart=limit;
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode) {
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(s==sLimit) {
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return TRUE;
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t length=(int32_t)(sLimit-s);
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(remainingCapacity<length && !resize(length, errorCode)) {
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    u_memcpy(limit, s, length);
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    limit+=length;
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    remainingCapacity-=length;
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    lastCC=0;
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reorderStart=limit;
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid ReorderingBuffer::remove() {
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reorderStart=limit=start;
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    remainingCapacity=str.getCapacity();
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    lastCC=0;
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid ReorderingBuffer::removeSuffix(int32_t suffixLength) {
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(suffixLength<(limit-start)) {
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit-=suffixLength;
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        remainingCapacity+=suffixLength;
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit=start;
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        remainingCapacity=str.getCapacity();
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    lastCC=0;
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reorderStart=limit;
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t reorderStartIndex=(int32_t)(reorderStart-start);
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t length=(int32_t)(limit-start);
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    str.releaseBuffer(length);
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t newCapacity=length+appendLength;
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t doubleCapacity=2*str.getCapacity();
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(newCapacity<doubleCapacity) {
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        newCapacity=doubleCapacity;
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(newCapacity<256) {
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        newCapacity=256;
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    start=str.getBuffer(newCapacity);
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(start==NULL) {
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // getBuffer() already did str.setToBogus()
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errorCode=U_MEMORY_ALLOCATION_ERROR;
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reorderStart=start+reorderStartIndex;
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    limit=start+length;
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    remainingCapacity=str.getCapacity()-length;
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid ReorderingBuffer::skipPrevious() {
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    codePointLimit=codePointStart;
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c=*--codePointStart;
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) {
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        --codePointStart;
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguint8_t ReorderingBuffer::previousCC() {
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    codePointLimit=codePointStart;
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(reorderStart>=codePointStart) {
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c=*--codePointStart;
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(c<Normalizer2Impl::MIN_CCC_LCCC_CP) {
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c2;
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) {
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        --codePointStart;
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c=U16_GET_SUPPLEMENTARY(c2, c);
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Inserts c somewhere before the last character.
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Requires 0<cc<lastCC which implies reorderStart<limit.
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid ReorderingBuffer::insert(UChar32 c, uint8_t cc) {
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(setIterator(), skipPrevious(); previousCC()>cc;) {}
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // insert c at codePointLimit, after the character with prevCC<=cc
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *q=limit;
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *r=limit+=U16_LENGTH(c);
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    do {
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *--r=*--q;
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } while(codePointLimit!=q);
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    writeCodePoint(q, c);
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(cc<=1) {
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        reorderStart=r;
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Normalizer2Impl --------------------------------------------------------- ***
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct CanonIterData : public UMemory {
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CanonIterData(UErrorCode &errorCode);
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ~CanonIterData();
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UTrie2 *trie;
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVector canonStartSets;  // contains UnicodeSet *
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::~Normalizer2Impl() {
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    udata_close(memory);
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utrie2_close(normTrie);
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fCanonIterData;
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool U_CALLCONV
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::isAcceptable(void *context,
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              const char * /* type */, const char * /*name*/,
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              const UDataInfo *pInfo) {
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->size>=20 &&
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->charsetFamily==U_CHARSET_FAMILY &&
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->dataFormat[1]==0x72 &&
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->dataFormat[2]==0x6d &&
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->dataFormat[3]==0x32 &&
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->formatVersion[0]==2
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ) {
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        Normalizer2Impl *me=(Normalizer2Impl *)context;
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return TRUE;
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const int32_t *inIndexes=(const int32_t *)inBytes;
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(indexesLength<=IX_MIN_MAYBE_YES) {
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    minYesNo=inIndexes[IX_MIN_YES_NO];
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    minNoNo=inIndexes[IX_MIN_NO_NO];
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    limitNoNo=inIndexes[IX_LIMIT_NO_NO];
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       inBytes+offset, nextOffset-offset, NULL,
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       &errorCode);
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    offset=nextOffset;
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    maybeYesCompositions=(const uint16_t *)(inBytes+offset);
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // smallFCD: new in formatVersion 2
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    offset=nextOffset;
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    smallFCD=inBytes+offset;
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Build tccc180[].
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t bits=0;
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(UChar c=0; c<0x180; bits>>=1) {
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if((c&0xff)==0) {
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            bits=smallFCD[c>>8];  // one byte per 0x100 code points
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(bits&1) {
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            for(int i=0; i<0x20; ++i, ++c) {
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tccc180[c]=(uint8_t)getFCD16FromNormData(c);
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uprv_memset(tccc180+c, 0, 0x20);
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c+=0x20;
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguint8_t Normalizer2Impl::getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const {
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c;
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(cpStart==(cpLimit-1)) {
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c=*cpStart;
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]);
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t prevNorm16=getNorm16(c);
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(prevNorm16<=minYesNo) {
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;  // yesYes and Hangul LV/LVT have ccc=tccc=0
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (uint8_t)(*getMapping(prevNorm16)>>8);  // tccc from yesNo
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgenumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* add the start code point to the USet */
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const USetAdder *sa=(const USetAdder *)context;
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sa->add(sa->set, start);
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic uint32_t U_CALLCONV
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgsegmentStarterMapper(const void * /*context*/, uint32_t value) {
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return value&CANON_NOT_SEGMENT_STARTER;
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* add the start code point of each same-value range of each trie */
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utrie2_enum(normTrie, NULL, enumPropertyStartsRange, sa);
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* add Hangul LV syllables and LV+1 because of skippables */
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        sa->add(sa->set, c);
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        sa->add(sa->set, c+1);
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* add the start code point of each same-value range of the canonical iterator data trie */
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(ensureCanonIterData(errorCode)) {
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // currently only used for the SEGMENT_STARTER property
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utrie2_enum(fCanonIterData->trie, segmentStarterMapper, enumPropertyStartsRange, sa);
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src,
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                UChar32 minNeedDataCP,
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                ReorderingBuffer *buffer,
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                UErrorCode &errorCode) const {
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Make some effort to support NUL-terminated strings reasonably.
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Take the part of the fast quick check loop that does not look up
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // data and check the first part of the string.
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // After this prefix, determine the string length to simplify the rest
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // of the code.
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevSrc=src;
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar c;
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while((c=*src++)<minNeedDataCP && c!=0) {}
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Back out the last character for full processing.
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Copy this prefix.
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(--src!=prevSrc) {
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(buffer!=NULL) {
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buffer->appendZeroCC(prevSrc, src, errorCode);
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return src;
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Dual functionality:
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// buffer!=NULL: normalize
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// buffer==NULL: isNormalized/spanQuickCheckYes
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::decompose(const UChar *src, const UChar *limit,
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           ReorderingBuffer *buffer,
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                           UErrorCode &errorCode) const {
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 minNoCP=minDecompNoCP;
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(limit==NULL) {
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(U_FAILURE(errorCode)) {
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return src;
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit=u_strchr(src, 0);
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevSrc;
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c=0;
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16=0;
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // only for quick check
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevBoundary=src;
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t prevCC=0;
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // count code units below the minimum or with irrelevant data for the quick check
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(prevSrc=src; src!=limit;) {
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if( (c=*src)<minNoCP ||
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++src;
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(!U16_IS_SURROGATE(c)) {
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar c2;
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(U16_IS_SURROGATE_LEAD(c)) {
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c=U16_GET_SUPPLEMENTARY(c, c2);
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else /* trail surrogate */ {
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        --src;
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c=U16_GET_SUPPLEMENTARY(c2, c);
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    src+=U16_LENGTH(c);
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // copy these code units all at once
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(src!=prevSrc) {
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(buffer!=NULL) {
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevCC=0;
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevBoundary=src;
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(src==limit) {
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Check one above-minimum, relevant code point.
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src+=U16_LENGTH(c);
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(buffer!=NULL) {
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(!decompose(c, norm16, *buffer, errorCode)) {
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(isDecompYes(norm16)) {
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uint8_t cc=getCCFromYesOrMaybe(norm16);
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(prevCC<=cc || cc==0) {
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    prevCC=cc;
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(cc<=1) {
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        prevBoundary=src;
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    continue;
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return prevBoundary;  // "no" or cc out of order
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return src;
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Decompose a short piece of text which is likely to contain characters that
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// fail the quick check loop and/or where the quick check loop's overhead
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// is unlikely to be amortized.
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Called by the compose() and makeFCD() implementations.
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                      ReorderingBuffer &buffer,
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                      UErrorCode &errorCode) const {
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(src<limit) {
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c;
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint16_t norm16;
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(!decompose(c, norm16, buffer, errorCode)) {
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                 ReorderingBuffer &buffer,
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                 UErrorCode &errorCode) const {
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Only loops for 1:1 algorithmic mappings.
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // get the decomposition and the lead and trail cc's
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(isDecompYes(norm16)) {
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c does not decompose
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isHangul(norm16)) {
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Hangul syllable: decompose algorithmically
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar jamos[3];
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isDecompNoAlgorithmic(norm16)) {
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=mapAlgorithmic(c, norm16);
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            norm16=getNorm16(c);
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c decomposes, get everything from the variable-length extra data
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const uint16_t *mapping=getMapping(norm16);
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint16_t firstUnit=*mapping;
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t length=firstUnit&MAPPING_LENGTH_MASK;
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint8_t leadCC, trailCC;
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            trailCC=(uint8_t)(firstUnit>>8);
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leadCC=(uint8_t)(*(mapping-1)>>8);
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                leadCC=0;
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return buffer.append((const UChar *)mapping+1, length, leadCC, trailCC, errorCode);
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const {
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *decomp=NULL;
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16;
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c does not decompose
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return decomp;
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isHangul(norm16)) {
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Hangul syllable: decompose algorithmically
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            length=Hangul::decompose(c, buffer);
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return buffer;
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isDecompNoAlgorithmic(norm16)) {
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=mapAlgorithmic(c, norm16);
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            decomp=buffer;
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            length=0;
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_APPEND_UNSAFE(buffer, length, c);
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c decomposes, get everything from the variable-length extra data
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const uint16_t *mapping=getMapping(norm16);
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            length=*mapping&MAPPING_LENGTH_MASK;
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return (const UChar *)mapping+1;
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// so that a raw mapping fits that consists of one unit ("rm0")
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// plus all but the first two code units of the normal mapping.
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK.
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const {
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // We do not loop in this method because an algorithmic mapping itself
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // becomes a final result rather than having to be decomposed recursively.
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16;
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // c does not decompose
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return NULL;
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(isHangul(norm16)) {
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Hangul syllable: decompose algorithmically
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        Hangul::getRawDecomposition(c, buffer);
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length=2;
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return buffer;
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(isDecompNoAlgorithmic(norm16)) {
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        c=mapAlgorithmic(c, norm16);
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length=0;
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U16_APPEND_UNSAFE(buffer, length, c);
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return buffer;
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // c decomposes, get everything from the variable-length extra data
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const uint16_t *mapping=getMapping(norm16);
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint16_t firstUnit=*mapping;
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t mLength=firstUnit&MAPPING_LENGTH_MASK;  // length of normal mapping
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(firstUnit&MAPPING_HAS_RAW_MAPPING) {
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1;
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint16_t rm0=*rawMapping;
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(rm0<=MAPPING_LENGTH_MASK) {
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                length=rm0;
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return (const UChar *)rawMapping-rm0;
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Copy the normal mapping and replace its first two code units with rm0.
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                buffer[0]=(UChar)rm0;
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2);
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                length=mLength-1;
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return buffer;
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            length=mLength;
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return (const UChar *)mapping+1;
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UBool doDecompose,
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UnicodeString &safeMiddle,
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         ReorderingBuffer &buffer,
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UErrorCode &errorCode) const {
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buffer.copyReorderableSuffixTo(safeMiddle);
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(doDecompose) {
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        decompose(src, limit, &buffer, errorCode);
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Just merge the strings at the boundary.
6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ForwardUTrie2StringIterator iter(normTrie, src, limit);
6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t firstCC, prevCC, cc;
6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    firstCC=prevCC=cc=getCC(iter.next16());
6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(cc!=0) {
6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        prevCC=cc;
6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cc=getCC(iter.next16());
6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    };
6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit=u_strchr(iter.codePointStart, 0);
6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) {
6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        buffer.appendZeroCC(iter.codePointStart, limit, errorCode);
6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Note: hasDecompBoundary() could be implemented as aliases to
6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// at the cost of building the FCD trie for a decomposition normalizer.
6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::hasDecompBoundary(UChar32 c, UBool before) const {
6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(c<minDecompNoCP) {
6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return TRUE;
6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint16_t norm16=getNorm16(c);
6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return TRUE;
6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(norm16>MIN_NORMAL_MAYBE_YES) {
6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;  // ccc!=0
6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isDecompNoAlgorithmic(norm16)) {
6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=mapAlgorithmic(c, norm16);
6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c decomposes, get everything from the variable-length extra data
6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const uint16_t *mapping=getMapping(norm16);
6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint16_t firstUnit=*mapping;
6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(!before) {
6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // decomp after-boundary: same as hasFCDBoundaryAfter(),
6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // fcd16<=1 || trailCC==0
6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(firstUnit>0x1ff) {
6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;  // trailCC>1
6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(firstUnit<=0xff) {
6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return TRUE;  // trailCC==0
6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // if(trailCC==1) test leadCC==0, same as checking for before-boundary
6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // TRUE if leadCC==0 (hasFCDBoundaryBefore())
6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Finds the recomposition result for
7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a forward-combining "lead" character,
7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * specified with a pointer to its compositions list,
7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and a backward-combining "trail" character.
7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If the lead and trail characters combine, then this function returns
7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the following "compositeAndFwd" value:
7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 21..1  composite character
7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bit      0  set if the composite is a forward-combining starter
7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * otherwise it returns -1.
7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The compositions list has (trail, compositeAndFwd) pair entries,
7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * encoded as either pairs or triples of 16-bit units.
7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The last entry has the high bit of its first unit set.
7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The list is sorted by ascending trail characters (there are no duplicates).
7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A linear search is used.
7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See normalizer2impl.h for a more detailed description
7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the compositions list format.
7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t key1, firstUnit;
7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(trail<COMP_1_TRAIL_LIMIT) {
7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // trail character is 0..33FF
7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // result entry may have 2 or 3 units
7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        key1=(uint16_t)(trail<<1);
7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while(key1>(firstUnit=*list)) {
7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            list+=2+(firstUnit&COMP_1_TRIPLE);
7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(firstUnit&COMP_1_TRIPLE) {
7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return ((int32_t)list[1]<<16)|list[2];
7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return list[1];
7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // trail character is 3400..10FFFF
7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // result entry has 3 units
7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (((trail>>COMP_1_TRAIL_SHIFT))&
7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          ~COMP_1_TRIPLE));
7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint16_t secondUnit;
7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(;;) {
7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(key1>(firstUnit=*list)) {
7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                list+=2+(firstUnit&COMP_1_TRIPLE);
7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(key2>(secondUnit=list[1])) {
7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(firstUnit&COMP_1_LAST_TUPLE) {
7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        list+=3;
7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2];
7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return -1;
7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param list some character's compositions list
7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  * @param set recursively receives the composites from these compositions
7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  */
7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const {
7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t firstUnit;
7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t compositeAndFwd;
7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    do {
7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        firstUnit=*list;
7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if((firstUnit&COMP_1_TRIPLE)==0) {
7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            compositeAndFwd=list[1];
7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            list+=2;
7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2];
7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            list+=3;
7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 composite=compositeAndFwd>>1;
7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if((compositeAndFwd&1)!=0) {
7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set.add(composite);
7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } while((firstUnit&COMP_1_LAST_TUPLE)==0);
7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Recomposes the buffer text starting at recomposeStartIndex
7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (which is in NFD - decomposed and canonically ordered),
8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and truncates the buffer contents.
8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note that recomposition never lengthens the text:
8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Any character consists of either one or two code units;
8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a composition may contain at most one more code unit than the original starter,
8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * while the combining mark that is removed has at least one code unit.
8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                UBool onlyContiguous) const {
8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *p=buffer.getStart()+recomposeStartIndex;
8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *limit=buffer.getLimit();
8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(p==limit) {
8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *starter, *pRemove, *q, *r;
8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint16_t *compositionsList;
8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c, compositeAndFwd;
8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16;
8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t cc, prevCC;
8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool starterIsSupplementary;
8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Some of the following variables are not used until we have a forward-combining starter
8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // and are only initialized now to avoid compiler warnings.
8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    compositionsList=NULL;  // used as indicator for whether we have a forward-combining starter
8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    starter=NULL;
8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    starterIsSupplementary=FALSE;
8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    prevCC=0;
8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cc=getCCFromYesOrMaybe(norm16);
8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if( // this character combines backward and
8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isMaybe(norm16) &&
8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // we have seen a starter that combines forward and
8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            compositionsList!=NULL &&
8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // the backward-combining character is not blocked
8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (prevCC<cc || prevCC==0)
8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ) {
8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(isJamoVT(norm16)) {
8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // c is a Jamo V/T, see if we can compose it with the previous character.
8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(c<Hangul::JAMO_T_BASE) {
8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE);
8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(prev<Hangul::JAMO_L_COUNT) {
8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        pRemove=p-1;
8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UChar syllable=(UChar)
8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            (Hangul::HANGUL_BASE+
8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             Hangul::JAMO_T_COUNT);
8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UChar t;
8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            ++p;
8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            syllable+=t;  // The next character was a Jamo T.
8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *starter=syllable;
8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // remove the Jamo V/T
8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        q=pRemove;
8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        r=p;
8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        while(r<limit) {
8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            *q++=*r++;
8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        limit=q;
8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        p=pRemove;
8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /*
8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * No "else" for Jamo T:
8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * Since the input is in NFD, there are no Hangul LV syllables that
8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * a Jamo T could combine with.
8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * All Jamo Ts are combined above when handling Jamo Vs.
8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 */
8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(p==limit) {
8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                compositionsList=NULL;
8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                continue;
8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if((compositeAndFwd=combine(compositionsList, c))>=0) {
8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // The starter and the combining mark (c) do combine.
8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 composite=compositeAndFwd>>1;
8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Replace the starter with the composite, remove the combining mark.
8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                pRemove=p-U16_LENGTH(c);  // pRemove & p: start & limit of the combining mark
8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(starterIsSupplementary) {
8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(U_IS_SUPPLEMENTARY(composite)) {
8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // both are supplementary
8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        starter[0]=U16_LEAD(composite);
8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        starter[1]=U16_TRAIL(composite);
8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *starter=(UChar)composite;
8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // The composite is shorter than the starter,
8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        // move the intermediate characters forward one.
8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        starterIsSupplementary=FALSE;
8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        q=starter+1;
8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        r=q+1;
8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        while(r<pRemove) {
8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            *q++=*r++;
8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        --pRemove;
8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else if(U_IS_SUPPLEMENTARY(composite)) {
9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // The composite is longer than the starter,
9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // move the intermediate characters back one.
9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    starterIsSupplementary=TRUE;
9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++starter;  // temporarily increment for the loop boundary
9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    q=pRemove;
9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    r=++pRemove;
9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    while(starter<q) {
9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *--r=*--q;
9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *starter=U16_TRAIL(composite);
9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *--starter=U16_LEAD(composite);  // undo the temporary increment
9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // both are on the BMP
9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *starter=(UChar)composite;
9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* remove the combining mark by moving the following text over it */
9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(pRemove<p) {
9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    q=pRemove;
9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    r=p;
9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    while(r<limit) {
9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *q++=*r++;
9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    limit=q;
9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    p=pRemove;
9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Keep prevCC because we removed the combining mark.
9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(p==limit) {
9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Is the composite a starter that combines forward?
9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(compositeAndFwd&1) {
9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    compositionsList=
9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        getCompositionsListForComposite(getNorm16(composite));
9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    compositionsList=NULL;
9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // We combined; continue with looking for compositions.
9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                continue;
9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // no combination this time
9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        prevCC=cc;
9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(p==limit) {
9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // If c did not combine, then check if it is a starter.
9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(cc==0) {
9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Found a new starter.
9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) {
9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // It may combine with something, prepare for it.
9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(U_IS_BMP(c)) {
9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    starterIsSupplementary=FALSE;
9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    starter=p-1;
9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    starterIsSupplementary=TRUE;
9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    starter=p-2;
9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(onlyContiguous) {
9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // FCC: no discontiguous compositions; any intervening character blocks.
9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            compositionsList=NULL;
9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buffer.setReorderingLimit(limit);
9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32
9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::composePair(UChar32 a, UChar32 b) const {
9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16=0
9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint16_t *list;
9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(isInert(norm16)) {
9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return U_SENTINEL;
9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(norm16<minYesNoMappingsOnly) {
9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(isJamoL(norm16)) {
9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            b-=Hangul::JAMO_V_BASE;
9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(0<=b && b<Hangul::JAMO_V_COUNT) {
9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return
9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    (Hangul::HANGUL_BASE+
9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)*
9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     Hangul::JAMO_T_COUNT);
9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return U_SENTINEL;
9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isHangul(norm16)) {
9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            b-=Hangul::JAMO_T_BASE;
9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(Hangul::isHangulWithoutJamoT(a) && 0<b && b<Hangul::JAMO_T_COUNT) {  // not b==0!
9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return a+b;
9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return U_SENTINEL;
9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // 'a' has a compositions list in extraData
9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            list=extraData+norm16;
9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(norm16>minYesNo) {  // composite 'a' has both mapping & compositions list
10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                list+=  // mapping pointer
10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    1+  // +1 to skip the first unit with the mapping lenth
10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    (*list&MAPPING_LENGTH_MASK);  // + mapping length
10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return U_SENTINEL;
10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        list=maybeYesCompositions+norm16-minMaybeYes;
10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(b<0 || 0x10ffff<b) {  // combine(list, b) requires a valid code point b
10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return U_SENTINEL;
10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return combine(list, b)>>1;
10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else
10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t compositeAndFwd=combine(list, b);
10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL;
10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// doCompose: normalize
10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// !doCompose: isNormalized (buffer must be empty and initialized)
10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool
10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::compose(const UChar *src, const UChar *limit,
10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UBool onlyContiguous,
10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UBool doCompose,
10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         ReorderingBuffer &buffer,
10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UErrorCode &errorCode) const {
10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * prevBoundary points to the last character before the current one
10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * that has a composition boundary before it with ccc==0 and quick check "yes".
10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Keeping track of prevBoundary saves us looking for a composition boundary
10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * when we find a "no" or "maybe".
10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * When we back out from prevSrc back to prevBoundary,
10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * then we also remove those same characters (which had been simply copied
10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * or canonically-order-inserted) from the ReorderingBuffer.
10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Therefore, at all times, the [prevBoundary..prevSrc[ source units
10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * must correspond 1:1 to destination units at the end of the destination buffer.
10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevBoundary=src;
10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 minNoMaybeCP=minCompNoMaybeCP;
10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(limit==NULL) {
10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP,
10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                           doCompose ? &buffer : NULL,
10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                           errorCode);
10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(U_FAILURE(errorCode)) {
10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(prevBoundary<src) {
10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Set prevBoundary to the last character in the prefix.
10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevBoundary=src-1;
10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit=u_strchr(src, 0);
10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevSrc;
10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c=0;
10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16=0;
10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // only for isNormalized
10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t prevCC=0;
10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // count code units below the minimum or with irrelevant data for the quick check
10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(prevSrc=src; src!=limit;) {
10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if( (c=*src)<minNoMaybeCP ||
10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++src;
10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(!U16_IS_SURROGATE(c)) {
10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar c2;
10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(U16_IS_SURROGATE_LEAD(c)) {
10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c=U16_GET_SUPPLEMENTARY(c, c2);
10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else /* trail surrogate */ {
10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        --src;
10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c=U16_GET_SUPPLEMENTARY(c2, c);
10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    src+=U16_LENGTH(c);
10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // copy these code units all at once
10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(src!=prevSrc) {
10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(doCompose) {
10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(!buffer.appendZeroCC(prevSrc, src, errorCode)) {
10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevCC=0;
11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(src==limit) {
11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Set prevBoundary to the last character in the quick check loop.
11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevBoundary=src-1;
11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_IS_LEAD(*(prevBoundary-1))
11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                --prevBoundary;
11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The start of the current character (c).
11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevSrc=src;
11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(src==limit) {
11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src+=U16_LENGTH(c);
11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * or has ccc!=0.
11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Check for Jamo V/T, then for regular characters.
11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * c is not a Hangul syllable or Jamo L because those have "yes" properties.
11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar prev=*(prevSrc-1);
11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UBool needToDecompose=FALSE;
11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(c<Hangul::JAMO_T_BASE) {
11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prev=(UChar)(prev-Hangul::JAMO_L_BASE);
11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(prev<Hangul::JAMO_L_COUNT) {
11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(!doCompose) {
11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return FALSE;
11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar syllable=(UChar)
11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        (Hangul::HANGUL_BASE+
11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         Hangul::JAMO_T_COUNT);
11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar t;
11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(src!=limit && (t=(UChar)(*src-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ++src;
11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        syllable+=t;  // The next character was a Jamo T.
11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        prevBoundary=src;
11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        buffer.setLastChar(syllable);
11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        continue;
11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // If we see L+V+x where x!=T then we drop to the slow path,
11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // decompose and recompose.
11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // This is to deal with NFKC finding normal L and V but a
11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // compatibility variant of a T. We need to either fully compose that
11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // combination here (which would complicate the code and may not work
11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // with strange custom data) or use the slow path -- or else our replacing
11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // two input characters (L+V) with one output character (LV syllable)
11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // would violate the invariant that [prevBoundary..prevSrc[ has the same
11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // length as what we appended to the buffer since prevBoundary.
11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    needToDecompose=TRUE;
11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(Hangul::isHangulWithoutJamoT(prev)) {
11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // c is a Jamo Trailing consonant,
11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // compose with previous Hangul LV that does not contain a Jamo T.
11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(!doCompose) {
11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                buffer.setLastChar((UChar)(prev+c-Hangul::JAMO_T_BASE));
11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevBoundary=src;
11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                continue;
11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(!needToDecompose) {
11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // The Jamo V/T did not compose into a Hangul syllable.
11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(doCompose) {
11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(!buffer.appendBMP((UChar)c, 0, errorCode)) {
11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    prevCC=0;
11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                continue;
11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Source buffer pointers:
11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *  all done      quick check   current char  not yet
11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *                "yes" but     (c)           processed
11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *                may combine
11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *                forward
11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * [-------------[-------------[-------------[-------------[
11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * |             |             |             |             |
11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * orig. src     prevBoundary  prevSrc       src           limit
11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Destination buffer pointers inside the ReorderingBuffer:
11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *  all done      might take    not filled yet
11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *                characters for
11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *                reordering
11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * [-------------[-------------[-------------[
11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * |             |             |             |
12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * start         reorderStart  limit         |
12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *                             +remainingCap.+
12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(norm16>=MIN_YES_YES_WITH_CC) {
12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint8_t cc=(uint8_t)norm16;  // cc!=0
12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if( onlyContiguous &&  // FCC
12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (doCompose ? buffer.getLastCC() : prevCC)==0 &&
12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevBoundary<prevSrc &&
12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // passed the quick check "yes && ccc==0" test.
12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Check whether the last character was a "yesYes" or a "yesNo".
12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If a "yesNo", then we get its trailing ccc from its
12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // mapping and check for canonical order.
12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // All other cases are ok.
12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc
12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fails FCD test, need to decompose and contiguously recompose.
12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(!doCompose) {
12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return FALSE;
12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(doCompose) {
12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(!buffer.append(c, cc, errorCode)) {
12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                continue;
12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(prevCC<=cc) {
12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevCC=cc;
12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                continue;
12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Find appropriate boundaries around this character,
12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * decompose the source text from between the boundaries,
12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * and recompose it.
12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * We may need to remove the last few characters from the ReorderingBuffer
12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * to account for source text that was copied or appended
12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * but needs to take part in the recomposition.
12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Find the last composition boundary in [prevBoundary..src[.
12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * It is either the decomposition of the current character (at prevSrc),
12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * or prevBoundary.
12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(hasCompBoundaryBefore(c, norm16)) {
12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevBoundary=prevSrc;
12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(doCompose) {
12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buffer.removeSuffix((int32_t)(prevSrc-prevBoundary));
12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Find the next composition boundary in [src..limit[ -
12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // modifies src to point to the next starter.
12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src=(UChar *)findNextCompBoundary(src, limit);
12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t recomposeStartIndex=buffer.length();
12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(!decomposeShort(prevBoundary, src, buffer, errorCode)) {
12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        recompose(buffer, recomposeStartIndex, onlyContiguous);
12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(!doCompose) {
12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(!buffer.equals(prevBoundary, src)) {
12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buffer.remove();
12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevCC=0;
12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Move to the next starter. We never need to look back before this point again.
12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        prevBoundary=src;
12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Very similar to compose(): Make the same changes in both places if relevant.
12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// pQCResult==NULL: spanQuickCheckYes
12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES)
12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *
12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                   UBool onlyContiguous,
12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                   UNormalizationCheckResult *pQCResult) const {
12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * prevBoundary points to the last character before the current one
12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * that has a composition boundary before it with ccc==0 and quick check "yes".
12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevBoundary=src;
12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 minNoMaybeCP=minCompNoMaybeCP;
12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(limit==NULL) {
12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode errorCode=U_ZERO_ERROR;
12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode);
12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(prevBoundary<src) {
12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Set prevBoundary to the last character in the prefix.
12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevBoundary=src-1;
13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit=u_strchr(src, 0);
13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevSrc;
13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c=0;
13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16=0;
13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t prevCC=0;
13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // count code units below the minimum or with irrelevant data for the quick check
13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(prevSrc=src;;) {
13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(src==limit) {
13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return src;
13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if( (c=*src)<minNoMaybeCP ||
13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++src;
13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(!U16_IS_SURROGATE(c)) {
13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar c2;
13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(U16_IS_SURROGATE_LEAD(c)) {
13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c=U16_GET_SUPPLEMENTARY(c, c2);
13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else /* trail surrogate */ {
13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        --src;
13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c=U16_GET_SUPPLEMENTARY(c2, c);
13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    src+=U16_LENGTH(c);
13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(src!=prevSrc) {
13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Set prevBoundary to the last character in the quick check loop.
13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevBoundary=src-1;
13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if( U16_IS_TRAIL(*prevBoundary) && prevSrc<prevBoundary &&
13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                U16_IS_LEAD(*(prevBoundary-1))
13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                --prevBoundary;
13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevCC=0;
13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The start of the current character (c).
13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevSrc=src;
13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src+=U16_LENGTH(c);
13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * or has ccc!=0.
13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(isMaybeOrNonZeroCC(norm16)) {
13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint8_t cc=getCCFromYesOrMaybe(norm16);
13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if( onlyContiguous &&  // FCC
13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                cc!=0 &&
13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevCC==0 &&
13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevBoundary<prevSrc &&
13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // prevCC==0 && prevBoundary<prevSrc tell us that
13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // passed the quick check "yes && ccc==0" test.
13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Check whether the last character was a "yesYes" or a "yesNo".
13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // If a "yesNo", then we get its trailing ccc from its
13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // mapping and check for canonical order.
13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // All other cases are ok.
13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                getTrailCCFromCompYesAndZeroCC(prevBoundary, prevSrc)>cc
13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fails FCD test.
13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(prevCC<=cc || cc==0) {
13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevCC=cc;
13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(norm16<MIN_YES_YES_WITH_CC) {
13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(pQCResult!=NULL) {
13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *pQCResult=UNORM_MAYBE;
13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return prevBoundary;
13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                continue;
13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(pQCResult!=NULL) {
13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *pQCResult=UNORM_NO;
13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return prevBoundary;
13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit,
13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       UBool doCompose,
13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       UBool onlyContiguous,
13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       UnicodeString &safeMiddle,
13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       ReorderingBuffer &buffer,
13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       UErrorCode &errorCode) const {
14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(!buffer.isEmpty()) {
14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *firstStarterInSrc=findNextCompBoundary(src, limit);
14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(src!=firstStarterInSrc) {
14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                                    buffer.getLimit());
14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeString middle(lastStarterInDest, destSuffixLength);
14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buffer.removeSuffix(destSuffixLength);
14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            safeMiddle=middle;
14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            middle.append(src, (int32_t)(firstStarterInSrc-src));
14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const UChar *middleStart=middle.getBuffer();
14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            compose(middleStart, middleStart+middle.length(), onlyContiguous,
14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    TRUE, buffer, errorCode);
14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U_FAILURE(errorCode)) {
14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return;
14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            src=firstStarterInSrc;
14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(doCompose) {
14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            limit=u_strchr(src, 0);
14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        buffer.appendZeroCC(src, limit, errorCode);
14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Does c have a composition boundary before it?
14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * True if its decomposition begins with a character that has
14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (isCompYesAndZeroCC()) so we need not decompose.
14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(isCompYesAndZeroCC(norm16)) {
14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return TRUE;
14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isMaybeOrNonZeroCC(norm16)) {
14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isDecompNoAlgorithmic(norm16)) {
14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=mapAlgorithmic(c, norm16);
14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            norm16=getNorm16(c);
14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c decomposes, get everything from the variable-length extra data
14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const uint16_t *mapping=getMapping(norm16);
14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint16_t firstUnit=*mapping;
14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;
14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD) && (*(mapping-1)&0xff00)) {
14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return FALSE;  // non-zero leadCC
14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t i=1;  // skip over the firstUnit
14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 c;
14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            U16_NEXT_UNSAFE(mapping, i, c);
14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return isCompYesAndZeroCC(getNorm16(c));
14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const {
14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint16_t norm16=getNorm16(c);
14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(isInert(norm16)) {
14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return TRUE;
14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(norm16<=minYesNo) {
14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Hangul: norm16==minYesNo
14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Hangul LVT has a boundary after it.
14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Hangul LV and non-inert yesYes characters combine forward.
14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return isHangul(norm16) && !Hangul::isHangulWithoutJamoT((UChar)c);
14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) {
14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FALSE;
14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isDecompNoAlgorithmic(norm16)) {
14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=mapAlgorithmic(c, norm16);
14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c decomposes, get everything from the variable-length extra data.
14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // If testInert, then c must be a yesNo character which has lccc=0,
14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // otherwise it could be a noNo.
14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const uint16_t *mapping=getMapping(norm16);
14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint16_t firstUnit=*mapping;
14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // TRUE if
14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   not MAPPING_NO_COMP_BOUNDARY_AFTER
14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //     (which is set if
14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //       c is not deleted, and
14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //       it and its decomposition do not combine forward, and it has a starter)
14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            //   and if FCC then trailCC<=1
14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return
14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (firstUnit&MAPPING_NO_COMP_BOUNDARY_AFTER)==0 &&
14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (!onlyContiguous || firstUnit<=0x1ff);
14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p) const {
14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    BackwardUTrie2StringIterator iter(normTrie, start, p);
14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16;
14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    do {
15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        norm16=iter.previous16();
15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } while(!hasCompBoundaryBefore(iter.codePoint, norm16));
15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // but that's probably not worth the extra cost.
15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return iter.codePointStart;
15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit) const {
15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ForwardUTrie2StringIterator iter(normTrie, p, limit);
15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t norm16;
15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    do {
15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        norm16=iter.next16();
15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } while(!hasCompBoundaryBefore(iter.codePoint, norm16));
15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return iter.codePointStart;
15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Note: normalizer2impl.cpp r30982 (2011-nov-27)
15176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// still had getFCDTrie() which built and cached an FCD trie.
15186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// That provided faster access to FCD data than getFCD16FromNormData()
15196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// but required synchronization and consumed some 10kB of heap memory
15206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// in any process that uses FCD (e.g., via collation).
15216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// tccc180[] and smallFCD[] are intended to help with any loss of performance,
15226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// at least for Latin & CJK.
15236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Gets the FCD value from the regular normalization data.
15256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
15266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Only loops for 1:1 algorithmic mappings.
15276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
15286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint16_t norm16=getNorm16(c);
15296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(norm16<=minYesNo) {
15306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // no decomposition or Hangul syllable, all zeros
15316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
15326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(norm16>=MIN_NORMAL_MAYBE_YES) {
15336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // combining mark
15346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            norm16&=0xff;
15356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return norm16|(norm16<<8);
15366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(norm16>=minMaybeYes) {
15376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
15386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(isDecompNoAlgorithmic(norm16)) {
15396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=mapAlgorithmic(c, norm16);
15406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
15416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c decomposes, get everything from the variable-length extra data
15426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const uint16_t *mapping=getMapping(norm16);
15436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint16_t firstUnit=*mapping;
15446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((firstUnit&MAPPING_LENGTH_MASK)==0) {
15456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // A character that is deleted (maps to an empty string) must
15466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // get the worst-case lccc and tccc values because arbitrary
15476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // characters on both sides will become adjacent.
15486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return 0x1ff;
15496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
15506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                norm16=firstUnit>>8;  // tccc
15516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
15526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    norm16|=*(mapping-1)&0xff00;  // lccc
15536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
15546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return norm16;
15556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
15566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
15596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Dual functionality:
15616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// buffer!=NULL: normalize
15626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
15636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *
15646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
15656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         ReorderingBuffer *buffer,
15666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UErrorCode &errorCode) const {
15676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
15686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Similar to the prevBoundary in the compose() implementation.
15696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevBoundary=src;
15706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t prevFCD16=0;
15716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(limit==NULL) {
15726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src=copyLowPrefixFromNulTerminated(src, MIN_CCC_LCCC_CP, buffer, errorCode);
15736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(U_FAILURE(errorCode)) {
15746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return src;
15756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(prevBoundary<src) {
15776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevBoundary=src;
15786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // We know that the previous character's lccc==0.
15796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Fetching the fcd16 value was deferred for this below-U+0300 code point.
15806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevFCD16=getFCD16(*(src-1));
15816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(prevFCD16>1) {
15826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                --prevBoundary;
15836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
15846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
15856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        limit=u_strchr(src, 0);
15866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
15876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Note: In this function we use buffer->appendZeroCC() because we track
15896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // the lead and trail combining classes here, rather than leaving it to
15906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // the ReorderingBuffer.
15916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The exception is the call to decomposeShort() which uses the buffer
15926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // in the normal way.
15936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *prevSrc;
15956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c=0;
15966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t fcd16=0;
15976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
15986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(;;) {
15996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // count code units with lccc==0
16006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        for(prevSrc=src; src!=limit;) {
16016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((c=*src)<MIN_CCC_LCCC_CP) {
16026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevFCD16=~c;
16036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++src;
16046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
16056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevFCD16=0;
16066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++src;
16076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
16086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(U16_IS_SURROGATE(c)) {
16096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UChar c2;
16106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(U16_IS_SURROGATE_LEAD(c)) {
16116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
16126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            c=U16_GET_SUPPLEMENTARY(c, c2);
16136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
16146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else /* trail surrogate */ {
16156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
16166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            --src;
16176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            c=U16_GET_SUPPLEMENTARY(c2, c);
16186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
16196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
16206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
16216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if((fcd16=getFCD16FromNormData(c))<=0xff) {
16226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    prevFCD16=fcd16;
16236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    src+=U16_LENGTH(c);
16246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
16256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
16266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
16276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
16286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // copy these code units all at once
16306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(src!=prevSrc) {
16316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) {
16326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
16336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
16346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(src==limit) {
16356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
16366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
16376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevBoundary=src;
16386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // We know that the previous character's lccc==0.
16396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(prevFCD16<0) {
16406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Fetching the fcd16 value was deferred for this below-U+0300 code point.
16416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UChar32 prev=~prevFCD16;
16426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
16436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(prevFCD16>1) {
16446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    --prevBoundary;
16456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
16466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
16476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const UChar *p=src-1;
16486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) {
16496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    --p;
16506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Need to fetch the previous character's FCD value because
16516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // prevFCD16 was just for the trail surrogate code point.
16526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));
16536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
16546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
16556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(prevFCD16>1) {
16566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    prevBoundary=p;
16576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
16586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
16596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // The start of the current character (c).
16606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevSrc=src;
16616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(src==limit) {
16626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
16636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
16646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
16656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        src+=U16_LENGTH(c);
16666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
16676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Check for proper order, and decompose locally if necessary.
16686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if((prevFCD16&0xff)<=(fcd16>>8)) {
16696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // proper order: prev tccc <= current lccc
16706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((fcd16&0xff)<=1) {
16716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                prevBoundary=src;
16726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
16736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) {
16746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
16756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
16766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevFCD16=fcd16;
16776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
16786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(buffer==NULL) {
16796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return prevBoundary;  // quick check "no"
16806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
16816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
16826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Back out the part of the source that we copied or appended
16836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * already but is now going to be decomposed.
16846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * prevSrc is set to after what was copied/appended.
16856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
16866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
16876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
16886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Find the part of the source that needs to be decomposed,
16896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * up to the next safe boundary.
16906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
16916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            src=findNextFCDBoundary(src, limit);
16926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
16936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * The source text does not fulfill the conditions for FCD.
16946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Decompose and reorder a limited piece of the text.
16956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
16966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(!decomposeShort(prevBoundary, src, *buffer, errorCode)) {
16976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
16986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
16996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevBoundary=src;
17006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            prevFCD16=0;
17016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return src;
17046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,
17076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       UBool doMakeFCD,
17086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       UnicodeString &safeMiddle,
17096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       ReorderingBuffer &buffer,
17106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       UErrorCode &errorCode) const {
17116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(!buffer.isEmpty()) {
17126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
17136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(src!=firstBoundaryInSrc) {
17146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
17156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                                    buffer.getLimit());
17166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
17176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeString middle(lastBoundaryInDest, destSuffixLength);
17186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            buffer.removeSuffix(destSuffixLength);
17196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            safeMiddle=middle;
17206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            middle.append(src, (int32_t)(firstBoundaryInSrc-src));
17216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const UChar *middleStart=middle.getBuffer();
17226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
17236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U_FAILURE(errorCode)) {
17246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return;
17256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
17266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            src=firstBoundaryInSrc;
17276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(doMakeFCD) {
17306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        makeFCD(src, limit, &buffer, errorCode);
17316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
17326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(limit==NULL) {  // appendZeroCC() needs limit!=NULL
17336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            limit=u_strchr(src, 0);
17346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        buffer.appendZeroCC(src, limit, errorCode);
17366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const {
17406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(start<p && previousFCD16(start, p)>0xff) {}
17416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return p;
17426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const {
17456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(p<limit) {
17466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const UChar *codePointStart=p;
17476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(nextFCD16(p, limit)<=0xff) {
17486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return codePointStart;
17496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return p;
17526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// CanonicalIterator data -------------------------------------------------- ***
17556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCanonIterData::CanonIterData(UErrorCode &errorCode) :
17576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        trie(utrie2_open(0, 0, &errorCode)),
17586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
17596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCanonIterData::~CanonIterData() {
17616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utrie2_close(trie);
17626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
17656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t canonValue=utrie2_get32(trie, decompLead);
17666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
17676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // origin is the first character whose decomposition starts with
17686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // the character for which we are setting the value.
17696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utrie2_set32(trie, decompLead, canonValue|origin, &errorCode);
17706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
17716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // origin is not the first character, or it is U+0000.
17726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeSet *set;
17736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if((canonValue&CANON_HAS_SET)==0) {
17746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set=new UnicodeSet;
17756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(set==NULL) {
17766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errorCode=U_MEMORY_ALLOCATION_ERROR;
17776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return;
17786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
17796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
17806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
17816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utrie2_set32(trie, decompLead, canonValue, &errorCode);
17826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            canonStartSets.addElement(set, errorCode);
17836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(firstOrigin!=0) {
17846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                set->add(firstOrigin);
17856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
17866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
17876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)];
17886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
17896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set->add(origin);
17906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
17916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
17926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN
17946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
17956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
17966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//     context: the Normalizer2Impl
17976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV
17986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgenumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
17996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode errorCode = U_ZERO_ERROR;
18006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (value != 0) {
18016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        Normalizer2Impl *impl = (Normalizer2Impl *)context;
18026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        impl->makeCanonIterDataFromNorm16(
18036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            start, end, (uint16_t)value, *impl->fCanonIterData, errorCode);
18046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return U_SUCCESS(errorCode);
18066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// UInitOnce instantiation function for CanonIterData
18116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV
18136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orginitCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
18146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_ASSERT(impl->fCanonIterData == NULL);
18156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    impl->fCanonIterData = new CanonIterData(errorCode);
18166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (impl->fCanonIterData == NULL) {
18176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errorCode=U_MEMORY_ALLOCATION_ERROR;
18186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_SUCCESS(errorCode)) {
18206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utrie2_enum(impl->getNormTrie(), NULL, enumCIDRangeHandler, impl);
18216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode);
18226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(errorCode)) {
18246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete impl->fCanonIterData;
18256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        impl->fCanonIterData = NULL;
18266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
18286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END
18306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
18316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
18326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                  CanonIterData &newData,
18336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                  UErrorCode &errorCode) const {
18346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(norm16==0 || (minYesNo<=norm16 && norm16<minNoNo)) {
18356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Inert, or 2-way mapping (including Hangul syllable).
18366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // We do not write a canonStartSet for any yesNo character.
18376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Composites from 2-way mappings are added at runtime from the
18386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // starter's compositions list, and the other characters in
18396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
18406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // "maybe" characters.
18416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
18426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
18436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(UChar32 c=start; c<=end; ++c) {
18446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint32_t oldValue=utrie2_get32(newData.trie, c);
18456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint32_t newValue=oldValue;
18466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(norm16>=minMaybeYes) {
18476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // not a segment starter if it occurs in a decomposition or has cc!=0
18486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            newValue|=CANON_NOT_SEGMENT_STARTER;
18496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(norm16<MIN_NORMAL_MAYBE_YES) {
18506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                newValue|=CANON_HAS_COMPOSITIONS;
18516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
18526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(norm16<minYesNo) {
18536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            newValue|=CANON_HAS_COMPOSITIONS;
18546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
18556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // c has a one-way decomposition
18566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 c2=c;
18576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uint16_t norm16_2=norm16;
18586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) {
18596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c2=mapAlgorithmic(c2, norm16_2);
18606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                norm16_2=getNorm16(c2);
18616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
18626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(minYesNo<=norm16_2 && norm16_2<limitNoNo) {
18636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // c decomposes, get everything from the variable-length extra data
18646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const uint16_t *mapping=getMapping(norm16_2);
18656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uint16_t firstUnit=*mapping;
18666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int32_t length=firstUnit&MAPPING_LENGTH_MASK;
18676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
18686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(c==c2 && (*(mapping-1)&0xff)!=0) {
18696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        newValue|=CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
18706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
18716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
18726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // Skip empty mappings (no characters in the decomposition).
18736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(length!=0) {
18746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++mapping;  // skip over the firstUnit
18756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // add c to first code point's start set
18766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t i=0;
18776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    U16_NEXT_UNSAFE(mapping, i, c2);
18786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    newData.addToStartSet(c, c2, errorCode);
18796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
18806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // one-way mapping. A 2-way mapping is possible here after
18816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    // intermediate algorithmic mapping.
18826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(norm16_2>=minNoNo) {
18836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        while(i<length) {
18846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            U16_NEXT_UNSAFE(mapping, i, c2);
18856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            uint32_t c2Value=utrie2_get32(newData.trie, c2);
18866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
18876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                utrie2_set32(newData.trie, c2, c2Value|CANON_NOT_SEGMENT_STARTER,
18886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                             &errorCode);
18896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            }
18906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        }
18916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
18926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
18936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
18946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                // c decomposed to c2 algorithmically; c has cc==0
18956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                newData.addToStartSet(c, c2, errorCode);
18966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
18976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
18986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(newValue!=oldValue) {
18996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            utrie2_set32(newData.trie, c, newValue, &errorCode);
19006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
19016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
19056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Logically const: Synchronized instantiation.
19066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
19076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode);
19086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return U_SUCCESS(errorCode);
19096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
19126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (int32_t)utrie2_get32(fCanonIterData->trie, c);
19136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
19166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return *(const UnicodeSet *)fCanonIterData->canonStartSets[n];
19176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
19206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return getCanonValue(c)>=0;
19216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
19246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER;
19256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(canonValue==0) {
19266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
19276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    set.clear();
19296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t value=canonValue&CANON_VALUE_MASK;
19306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((canonValue&CANON_HAS_SET)!=0) {
19316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set.addAll(getCanonStartSet(value));
19326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(value!=0) {
19336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        set.add(value);
19346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
19366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint16_t norm16=getNorm16(c);
19376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(norm16==JAMO_L) {
19386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UChar32 syllable=
19396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
19406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
19416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
19426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            addComposites(getCompositionsList(norm16), set);
19436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
19446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return TRUE;
19466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
19476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
19496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Normalizer2 data swapping ----------------------------------------------- ***
19516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_USE
19536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2
19556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm2_swap(const UDataSwapper *ds,
19566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            const void *inData, int32_t length, void *outData,
19576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UErrorCode *pErrorCode) {
19586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UDataInfo *pInfo;
19596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t headerSize;
19606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint8_t *inBytes;
19626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t *outBytes;
19636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const int32_t *inIndexes;
19656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t indexes[Normalizer2Impl::IX_MIN_MAYBE_YES+1];
19666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i, offset, nextOffset, size;
19686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* udata_swapDataHeader checks the arguments */
19706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
19716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
19726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
19736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* check data format and format version */
19766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pInfo=(const UDataInfo *)((const char *)inData+4);
19776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(!(
19786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Nrm2" */
19796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->dataFormat[1]==0x72 &&
19806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->dataFormat[2]==0x6d &&
19816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pInfo->dataFormat[3]==0x32 &&
19826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (pInfo->formatVersion[0]==1 || pInfo->formatVersion[0]==2)
19836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    )) {
19846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
19856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         pInfo->dataFormat[0], pInfo->dataFormat[1],
19866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         pInfo->dataFormat[2], pInfo->dataFormat[3],
19876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         pInfo->formatVersion[0]);
19886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *pErrorCode=U_UNSUPPORTED_ERROR;
19896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0;
19906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
19916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    inBytes=(const uint8_t *)inData+headerSize;
19936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    outBytes=(uint8_t *)outData+headerSize;
19946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    inIndexes=(const int32_t *)inBytes;
19966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
19976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length>=0) {
19986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        length-=headerSize;
19996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length<(int32_t)sizeof(indexes)) {
20006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
20016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             length);
20026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
20036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
20046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* read the first few indexes */
20086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i=0; i<=Normalizer2Impl::IX_MIN_MAYBE_YES; ++i) {
20096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        indexes[i]=udata_readInt32(ds, inIndexes[i]);
20106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* get the total length of the data */
20136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    size=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
20146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(length>=0) {
20166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(length<size) {
20176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n",
20186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             length);
20196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
20206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return 0;
20216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* copy the data for inaccessible bytes */
20246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(inBytes!=outBytes) {
20256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            uprv_memcpy(outBytes, inBytes, size);
20266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
20276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        offset=0;
20296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* swap the int32_t indexes[] */
20316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET];
20326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
20336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        offset=nextOffset;
20346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* swap the UTrie2 */
20366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
20376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
20386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        offset=nextOffset;
20396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* swap the uint16_t extraData[] */
20416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET];
20426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
20436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        offset=nextOffset;
20446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */
20466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];
20476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        offset=nextOffset;
20486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U_ASSERT(offset==size);
20506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
20516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return headerSize+size;
20536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
20546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
20556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif  // !UCONFIG_NO_NORMALIZATION
2056