18393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/*
28393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ******************************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius *   Copyright (C) 1996-2014, International Business Machines
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius *   Corporation and others.  All Rights Reserved.
58393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ******************************************************************************
68393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius */
78393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
88393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/utypes.h"
98393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#if !UCONFIG_NO_COLLATION
118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/unistr.h"
138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/usearch.h"
148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "cmemory.h"
168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/coll.h"
178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/tblcoll.h"
188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/coleitr.h"
198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/ucoleitr.h"
208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/regex.h"        // TODO: make conditional on regexp being built.
228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uniset.h"
248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uset.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/usetiter.h"
268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/ustring.h"
278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "hash.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h"
298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "uhash.h"
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "usrchimp.h"
318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "uassert.h"
328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "colldata.h"
348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#define DELETE_ARRAY(array) uprv_free((void *) (array))
388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusCEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0)
428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UCollationStrength strength = ucol_getStrength(coll);
458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uint32_t variableTop = ucol_getVariableTop(coll, &status);
478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uint32_t strengthMask = 0;
488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t order;
498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // **** only set flag if string has Han(gul) ****
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // ucol_forceHanImplicit(elems, &status); -- removed for ticket #10476
568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    switch (strength)
588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    {
598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    default:
608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        strengthMask |= UCOL_TERTIARYORDERMASK;
618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        /* fall through */
628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    case UCOL_SECONDARY:
648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        strengthMask |= UCOL_SECONDARYORDERMASK;
658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        /* fall through */
668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    case UCOL_PRIMARY:
688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        strengthMask |= UCOL_PRIMARYORDERMASK;
698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ces = ceBuffer;
728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UBool cont = isContinuation(order);
758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        order &= strengthMask;
778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (toShift && variableTop > (uint32_t)order && (order & UCOL_PRIMARYORDERMASK) != 0) {
798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (strength >= UCOL_QUATERNARY) {
808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                order &= UCOL_PRIMARYORDERMASK;
818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            } else {
828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                order = UCOL_IGNORABLE;
838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (order == UCOL_IGNORABLE) {
878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            continue;
888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (cont) {
918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            order |= UCOL_CONTINUATION_MARKER;
928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        add(order, status);
958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ucol_closeElements(elems);
988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusCEList::~CEList()
1018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (ces != ceBuffer) {
1038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        DELETE_ARRAY(ces);
1048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid CEList::add(uint32_t ce, UErrorCode &status)
1088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
1108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
1118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (listSize >= listMax) {
1148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        int32_t newMax = listMax + CELIST_BUFFER_SIZE;
1158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax);
1168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (newCEs == NULL) {
1188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
1198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return;
1208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        uprv_memcpy(newCEs, ces, listSize * sizeof(uint32_t));
1238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (ces != ceBuffer) {
1258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            DELETE_ARRAY(ces);
1268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ces = newCEs;
1298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        listMax = newMax;
1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ces[listSize++] = ce;
1338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusuint32_t CEList::get(int32_t index) const
1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (index >= 0 && index < listSize) {
1388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return ces[index];
1398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return (uint32_t)UCOL_NULLORDER;
1428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusuint32_t &CEList::operator[](int32_t index) const
1458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return ces[index];
1478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUBool CEList::matchesAt(int32_t offset, const CEList *other) const
1508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (other == NULL || listSize - offset < other->size()) {
1528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return FALSE;
1538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) {
1568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (ces[i] != (*other)[j]) {
1578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return FALSE;
1588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
1598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return TRUE;
1628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t CEList::size() const
1658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return listSize;
1678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusStringList::StringList(UErrorCode &status)
1708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    : strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0)
1718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    strings = new UnicodeString [listMax];
1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (strings == NULL) {
1798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
1808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
1818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusStringList::~StringList()
1858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete[] strings;
1878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
1898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid StringList::add(const UnicodeString *string, UErrorCode &status)
1908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
1918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
1928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
1938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (listSize >= listMax) {
1958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE;
1968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UnicodeString *newStrings = new UnicodeString[newMax];
1978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (newStrings == NULL) {
1988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
1998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return;
2008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        for (int32_t i=0; i<listSize; ++i) {
2028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            newStrings[i] = strings[i];
2038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        delete[] strings;
2058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        strings = newStrings;
2068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        listMax = newMax;
2078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // The ctor initialized all the strings in
2108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // the array to empty strings, so this
2118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // is the same as copying the source string.
2128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    strings[listSize++].append(*string);
2138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid StringList::add(const UChar *chars, int32_t count, UErrorCode &status)
2168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
2178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    const UnicodeString string(chars, count);
2188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    add(&string, status);
2208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst UnicodeString *StringList::get(int32_t index) const
2238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
2248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (index >= 0 && index < listSize) {
2258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return &strings[index];
2268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return NULL;
2298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t StringList::size() const
2328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
2338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return listSize;
2348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_CDECL_BEGIN
2388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic void U_CALLCONV
2398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusdeleteStringList(void *obj)
2408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
2418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    StringList *strings = (StringList *) obj;
2428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete strings;
2448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_CDECL_END
2468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusclass CEToStringsMap
2488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
2498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliuspublic:
2508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    CEToStringsMap(UErrorCode &status);
2518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ~CEToStringsMap();
2528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    void put(uint32_t ce, UnicodeString *string, UErrorCode &status);
2548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    StringList *getStringList(uint32_t ce) const;
2558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusprivate:
2578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status);
2588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UHashtable *map;
2598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius};
2608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusCEToStringsMap::CEToStringsMap(UErrorCode &status)
2628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    : map(NULL)
2638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
2648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
2658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
2668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    map = uhash_open(uhash_hashLong, uhash_compareLong,
2698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                     uhash_compareCaselessUnicodeString,
2708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                     &status);
2718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
2738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
2748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_setValueDeleter(map, deleteStringList);
2778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusCEToStringsMap::~CEToStringsMap()
2808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
2818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_close(map);
2828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
2838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid CEToStringsMap::put(uint32_t ce, UnicodeString *string, UErrorCode &status)
2858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
2868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    StringList *strings = getStringList(ce);
2878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (strings == NULL) {
2898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        strings = new StringList(status);
2908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (strings == NULL || U_FAILURE(status)) {
2928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
2938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return;
2948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
2958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        putStringList(ce, strings, status);
2978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
2988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
2998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    strings->add(string, status);
3008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
3018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusStringList *CEToStringsMap::getStringList(uint32_t ce) const
3038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
3048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return (StringList *) uhash_iget(map, ce);
3058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
3068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCode &status)
3088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
3098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uhash_iput(map, ce, (void *) stringList, &status);
3108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
3118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#define CLONE_COLLATOR
3138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusCollData::CollData(UCollator *collator, UErrorCode &status)
3158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    : coll(NULL), ceToCharsStartingWith(NULL)
3168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
3178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]]
3188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // i.e. other, control, private use, format, surrogate
3198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    U_STRING_DECL(test_pattern, "[[:assigned:]-[:c:]]", 20);
3208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    U_STRING_INIT(test_pattern, "[[:assigned:]-[:c:]]", 20);
3218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USet *charsToTest = uset_openPattern(test_pattern, 20, &status);
3228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Han ext. A, Han, Jamo, Hangul, Han Ext. B
3248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // i.e. all the characers we handle implicitly
3258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    U_STRING_DECL(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70);
3268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    U_STRING_INIT(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70);
3278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USet *charsToRemove = uset_openPattern(remove_pattern, 70, &status);
3288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
3308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
3318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
3328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USet *expansions   = uset_openEmpty();
3348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    USet *contractions = uset_openEmpty();
3358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t itemCount;
3368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ceToCharsStartingWith = new CEToStringsMap(status);
3388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
3408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        goto bail;
3418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
3428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#ifdef CLONE_COLLATOR
3448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    coll = ucol_safeClone(collator, NULL, NULL, &status);
3458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
3478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        goto bail;
3488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
3498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#else
3508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    coll = collator;
3518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif
3528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
3548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uset_addAll(charsToTest, contractions);
3568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uset_addAll(charsToTest, expansions);
3578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uset_removeAll(charsToTest, charsToRemove);
3588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    itemCount = uset_getItemCount(charsToTest);
3608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for(int32_t item = 0; item < itemCount; item += 1) {
3618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UChar32 start = 0, end = 0;
3628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        UChar buffer[16];
3638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        int32_t len = uset_getItem(charsToTest, item, &start, &end,
3648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                                   buffer, 16, &status);
3658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (len == 0) {
3678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            for (UChar32 ch = start; ch <= end; ch += 1) {
3688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                UnicodeString *st = new UnicodeString(ch);
3698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                if (st == NULL) {
3718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    status = U_MEMORY_ALLOCATION_ERROR;
3728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    break;
3738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                }
3748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                CEList *ceList = new CEList(coll, *st, status);
3768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                ceToCharsStartingWith->put(ceList->get(0), st, status);
3788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                delete ceList;
3808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                delete st;
3818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
3828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        } else if (len > 0) {
3838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            UnicodeString *st = new UnicodeString(buffer, len);
3848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (st == NULL) {
3868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                status = U_MEMORY_ALLOCATION_ERROR;
3878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                break;
3888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
3898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            CEList *ceList = new CEList(coll, *st, status);
3918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            ceToCharsStartingWith->put(ceList->get(0), st, status);
3938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
3948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            delete ceList;
3958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            delete st;
3968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        } else {
3978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // shouldn't happen...
3988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
3998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (U_FAILURE(status)) {
4018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius             break;
4028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
4038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
4048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusbail:
4068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uset_close(contractions);
4078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uset_close(expansions);
4088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uset_close(charsToRemove);
4098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uset_close(charsToTest);
4108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
4128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return;
4138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
4148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet hanRanges(UNICODE_STRING_SIMPLE("[:Unified_Ideograph:]"), status);
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(status)) {
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSetIterator hanIter(hanRanges);
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString hanString;
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(hanIter.nextRange()) {
422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        hanString.append(hanIter.getCodepoint());
423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        hanString.append(hanIter.getCodepointEnd());
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // TODO: Why U+11FF? The old code had an outdated UCOL_LAST_T_JAMO=0x11F9,
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // but as of Unicode 6.3 the 11xx block is filled,
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // and there are also more Jamo T at U+D7CB..U+D7FB.
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Maybe use [:HST=T:] and look for the end of the last range?
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Maybe use script boundary mappings instead of this code??
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar  jamoRanges[] = {Hangul::JAMO_L_BASE, Hangul::JAMO_V_BASE, Hangul::JAMO_T_BASE + 1, 0x11FF};
4318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges));
4328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     CEList hanList(coll, hanString, status);
4338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     CEList jamoList(coll, jamoString, status);
4348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     int32_t j = 0;
4358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     if (U_FAILURE(status)) {
4378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         return;
4388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     }
4398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     for (int32_t c = 0; c < jamoList.size(); c += 1) {
4418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         uint32_t jce = jamoList[c];
4428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         if (! isContinuation(jce)) {
4448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius             jamoLimits[j++] = jce;
4458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         }
4468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     }
4478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     jamoLimits[3] += (1 << UCOL_PRIMARYORDERSHIFT);
4498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     minHan = 0xFFFFFFFF;
4518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     maxHan = 0;
4528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     for(int32_t h = 0; h < hanList.size(); h += 2) {
4548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         uint32_t han = (uint32_t) hanList[h];
4558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         if (han < minHan) {
4578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius             minHan = han;
4588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         }
4598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         if (han > maxHan) {
4618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius             maxHan = han;
4628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius         }
4638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     }
4648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius     maxHan += (1 << UCOL_PRIMARYORDERSHIFT);
4668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
4678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusCollData::~CollData()
4698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
4708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#ifdef CLONE_COLLATOR
4718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius   ucol_close(coll);
4728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif
4738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius   delete ceToCharsStartingWith;
4758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
4768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUCollator *CollData::getCollator() const
4788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
4798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return coll;
4808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
4818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst StringList *CollData::getStringList(int32_t ce) const
4838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
4848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return ceToCharsStartingWith->getStringList(ce);
4858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
4868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusconst CEList *CollData::getCEList(const UnicodeString *string) const
4888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
4898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    UErrorCode status = U_ZERO_ERROR;
4908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    const CEList *list = new CEList(coll, *string, status);
4918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (U_FAILURE(status)) {
4938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        delete list;
4948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        list = NULL;
4958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
4968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
4978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return list;
4988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
4998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid CollData::freeCEList(const CEList *list)
5018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
5028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete list;
5038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
5048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const
5068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
5078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // find out shortest string for the longest sequence of ces.
5088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // this can probably be folded with the minLengthCache...
5098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (history[offset] >= 0) {
5118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return history[offset];
5128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
5138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    uint32_t ce = ceList->get(offset);
5158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t maxOffset = ceList->size();
5168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t shortestLength = INT32_MAX;
5178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    const StringList *strings = ceToCharsStartingWith->getStringList(ce);
5188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (strings != NULL) {
5208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        int32_t stringCount = strings->size();
5218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        for (int32_t s = 0; s < stringCount; s += 1) {
5238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            const UnicodeString *string = strings->get(s);
5248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            UErrorCode status = U_ZERO_ERROR;
5258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            const CEList *ceList2 = new CEList(coll, *string, status);
5268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (U_FAILURE(status)) {
5288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                delete ceList2;
5298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                ceList2 = NULL;
5308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
5318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (ceList->matchesAt(offset, ceList2)) {
5338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                U_ASSERT(ceList2 != NULL);
5348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                int32_t clength = ceList2->size();
5358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                int32_t slength = string->length();
5368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                int32_t roffset = offset + clength;
5378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                int32_t rlength = 0;
5388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                if (roffset < maxOffset) {
5408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    rlength = minLengthInChars(ceList, roffset, history);
5418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    if (rlength <= 0) {
5438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    // delete before continue to avoid memory leak.
5448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        delete ceList2;
5458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        // ignore any dead ends
5478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        continue;
5488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    }
5498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                }
5508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                if (shortestLength > slength + rlength) {
5528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    shortestLength = slength + rlength;
5538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                }
5548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
5558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            delete ceList2;
5578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
5588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
5598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (shortestLength == INT32_MAX) {
5618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // No matching strings at this offset. See if
5628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // the CE is in a range we can handle manually.
5638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if (ce >= minHan && ce < maxHan) {
5648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // all han have implicit orders which
5658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // generate two CEs.
5668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            int32_t roffset = offset + 2;
5678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            int32_t rlength = 0;
5688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          //history[roffset++] = -1;
5708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius          //history[roffset++] = 1;
5718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (roffset < maxOffset) {
5738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                rlength = minLengthInChars(ceList, roffset, history);
5748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
5758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (rlength < 0) {
5778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                return -1;
5788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
5798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5808393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            shortestLength = 1 + rlength;
5818393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            goto have_shortest;
5828393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        } else if (ce >= jamoLimits[0] && ce < jamoLimits[3]) {
5838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            int32_t roffset = offset;
5848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            int32_t rlength = 0;
5858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            // **** this loop may not handle archaic Hangul correctly ****
5878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            for (int32_t j = 0; roffset < maxOffset && j < 4; j += 1, roffset += 1) {
5888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                uint32_t jce = ceList->get(roffset);
5898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // Some Jamo have 24-bit primary order; skip the
5918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // 2nd CE. This should always be OK because if
5928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // we're still in the loop all we've seen are
5938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // a series of Jamo in LVT order.
5948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                if (isContinuation(jce)) {
5958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    continue;
5968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                }
5978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
5988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                if (j >= 3 || jce < jamoLimits[j] || jce >= jamoLimits[j + 1]) {
5998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    break;
6008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                }
6018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
6028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (roffset == offset) {
6048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // we started with a non-L Jamo...
6058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // just say it comes from a single character
6068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                roffset += 1;
6078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                // See if the single Jamo has a 24-bit order.
6098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                if (roffset < maxOffset && isContinuation(ceList->get(roffset))) {
6108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    roffset += 1;
6118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                }
6128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
6138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (roffset < maxOffset) {
6158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                rlength = minLengthInChars(ceList, roffset, history);
6168393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
6178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6188393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if (rlength < 0) {
6198393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                return -1;
6208393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            }
6218393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6228393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            shortestLength = 1 + rlength;
6238393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            goto have_shortest;
6248393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
6258393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6268393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // Can't handle it manually either. Just move on.
6278393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        return -1;
6288393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
6298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliushave_shortest:
6318393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    history[offset] = shortestLength;
6328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return shortestLength;
6348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
6358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusint32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const
6378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius{
6388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t clength = ceList->size();
6398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t *history = NEW_ARRAY(int32_t, clength);
6408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    for (int32_t i = 0; i < clength; i += 1) {
6428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        history[i] = -1;
6438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
6448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    int32_t minLength = minLengthInChars(ceList, offset, history);
6468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    DELETE_ARRAY(history);
6488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return minLength;
6508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
6518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
6528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#endif // #if !UCONFIG_NO_COLLATION
653