1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
3f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* Copyright (C) 2009-2013, International Business Machines Corporation and
4f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* others. All Rights Reserved.
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
10103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/alphaindex.h"
13f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/coleitr.h"
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/coll.h"
15f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/localpointer.h"
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/normalizer2.h"
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/tblcoll.h"
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ulocdata.h"
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uniset.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h"
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/usetiter.h"
22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
24f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "cmemory.h"
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cstring.h"
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h"
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uvector.h"
28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//#include <string>
30103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//#include <iostream>
31f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
32f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
33f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
36f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
38f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
39f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Prefix string for Chinese index buckets.
40f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
41f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
42f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UChar BASE[1] = { 0xFDD0 };
43f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst int32_t BASE_LENGTH = 1;
44f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
45f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
46f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                const UnicodeString &one, const UnicodeString &other);
47f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
48f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
51f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right);
52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right);
55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  UVector<Record *> support function, delete a Record.
57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void U_CALLCONV
58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoalphaIndex_deleteRecord(void *obj) {
59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete static_cast<AlphabeticIndex::Record *>(obj);
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
62f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
64f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
65f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                           UErrorCode &errorCode) {
66f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
67f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (owned.isValid()) {
68f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return owned.orphan();
69f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
70f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString *p = new UnicodeString(s);
71f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (p == NULL) {
72f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
73f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
74f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return p;
75f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
77f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline UnicodeString *getString(const UVector &list, int32_t i) {
78f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<UnicodeString *>(list[i]);
79f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
81f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
82f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<AlphabeticIndex::Bucket *>(list[i]);
83f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
85f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
86f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<AlphabeticIndex::Record *>(list[i]);
87f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
88f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
89f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
90f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Like Java Collections.binarySearch(List, String, Comparator).
91f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *
92f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * @return the index>=0 where the item was found,
93f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *         or the index<0 for inserting the string at ~index in sorted order
94f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
95f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
96f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (list.size() == 0) { return ~0; }
97f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t start = 0;
98f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t limit = list.size();
99f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (;;) {
100f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t i = (start + limit) / 2;
101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UErrorCode errorCode = U_ZERO_ERROR;
103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UCollationResult cmp = coll.compare(s, *si, errorCode);
104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (cmp == UCOL_EQUAL) {
105f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return i;
106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (cmp < 0) {
107f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (i == start) {
108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return ~start;  // insert s before *si
109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
110f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            limit = i;
111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
112f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (i == start) {
113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return ~(start + 1);  // insert s after *si
114f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
115f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            start = i;
116f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
120f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// The BucketList is not in the anonymous namespace because only Clang
123f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// seems to support its use in other classes from there.
124f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// However, we also don't need U_I18N_API because it is not used from outside the i18n library.
125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusclass BucketList : public UObject {
126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliuspublic:
127f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    BucketList(UVector *bucketList, UVector *publicBucketList)
128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
129f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t displayIndex = 0;
130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < publicBucketList->size(); ++i) {
131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
133f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
135f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    virtual ~BucketList() {
136f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        delete bucketList_;
137f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (immutableVisibleList_ != bucketList_) {
138f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            delete immutableVisibleList_;
139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
142f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t getBucketCount() const {
143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return immutableVisibleList_->size();
144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
145f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
146f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
147f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                           UErrorCode &errorCode) {
148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // binary search
149f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t start = 0;
150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t limit = bucketList_->size();
151f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while ((start + 1) < limit) {
152f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t i = (start + limit) / 2;
153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
154f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UCollationResult nameVsBucket =
155f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
156f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (nameVsBucket < 0) {
157f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                limit = i;
158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            } else {
159f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                start = i;
160f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
164f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket = bucket->displayBucket_;
165f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
166f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bucket->displayIndex_;
167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
168f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
169f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    /** All of the buckets, visible and invisible. */
170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector *bucketList_;
171f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    /** Just the visible buckets. */
172f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector *immutableVisibleList_;
173f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius};
174f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
175f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
176f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete buckets_;
177f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete collatorPrimaryOnly_;
178f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
179f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t
181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketCount() const {
182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketCount();
183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t
186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketIndex(
187f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString &name, UErrorCode &errorCode) const {
188f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst AlphabeticIndex::Bucket *
192f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
193f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (0 <= index && index < buckets_->getBucketCount()) {
194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return icu::getBucket(*buckets_->immutableVisibleList_, index);
195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
200f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
201f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : inputList_(NULL),
202f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
203f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          maxLabelCount_(99),
204f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          initialLabels_(NULL), firstCharsInScripts_(NULL),
205f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          collator_(NULL), collatorPrimaryOnly_(NULL),
206f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          buckets_(NULL) {
207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    init(&locale, status);
208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
209f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
210f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
211f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
212f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : inputList_(NULL),
213f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          maxLabelCount_(99),
215f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          initialLabels_(NULL), firstCharsInScripts_(NULL),
216f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          collator_(collator), collatorPrimaryOnly_(NULL),
217f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          buckets_(NULL) {
218f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    init(NULL, status);
219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
220f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::~AlphabeticIndex() {
224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete collator_;
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete collatorPrimaryOnly_;
226f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete firstCharsInScripts_;
227f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete buckets_;
228f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete inputList_;
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete initialLabels_;
230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    initialLabels_->addAll(additions);
238f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
244f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    addIndexExemplars(locale, status);
245f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
250f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
251f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
252f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // In C++, the ImmutableIndex must own its copy of the BucketList,
253f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // even if it contains no records, for proper memory management.
254f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We could clone the buckets_ if they are not NULL,
255f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // but that would be worth it only if this method is called multiple times,
256f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // or called after using the old-style bucket iterator API.
257f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<RuleBasedCollator> coll(
259f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
260f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (immutableBucketList.isNull() || coll.isNull()) {
261f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
262f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
263f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
264f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
265f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (immIndex == NULL) {
266f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
267f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
268f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
269f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // The ImmutableIndex adopted its parameter objects.
270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    immutableBucketList.orphan();
271f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    coll.orphan();
272f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return immIndex;
273f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
274f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
276f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
280f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketCount();
281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
285f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status) || inputList_ == NULL) {
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
288f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return inputList_->size();
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
291f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
292f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
293f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return; }
294f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
295f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
296f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString &overflowBoundary =
297f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
298f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
299f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We make a sorted array of elements.
300f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Some of the input may be redundant.
301f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We filter out those cases.
303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSetIterator iter(*initialLabels_);
304f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (iter.next()) {
305f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString *item = &iter.getString();
306f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        LocalPointer<UnicodeString> ownedItem;
307f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UBool checkDistinct;
308f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t itemLength = item->length();
309f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (!item->hasMoreChar32Than(0, itemLength, 1)) {
310f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = FALSE;
311f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
312f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                item->charAt(itemLength - 2) != 0x2a) {
313f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Use a label if it is marked with one trailing star,
314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // even if the label string sorts the same when all contractions are suppressed.
315f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
316f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            item = ownedItem.getAlias();
317f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (item == NULL) {
318f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
319f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return;
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
321f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = FALSE;
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
323f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = TRUE;
324f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
325f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
326f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Ignore a primary-ignorable or non-alphabetic index character.
327f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
328f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Ignore an index characters that will land in the overflow bucket.
329f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (checkDistinct &&
330f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
331f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Ignore a multi-code point index character that does not sort distinctly
332f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // from the sequence of its separate characters.
333f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (insertionPoint < 0) {
336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                indexCharacters.insertElementAt(
337f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
338f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            } else {
339f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
340f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
341f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    indexCharacters.setElementAt(
342f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        ownedString(*item, ownedItem, errorCode), insertionPoint);
343f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
344f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
347f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return; }
348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
349f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // if the result is still too large, cut down to maxCount elements, by removing every nth element
350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
351f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t size = indexCharacters.size() - 1;
352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (size > maxLabelCount_) {
353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t count = 0;
354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t old = -1;
355f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < indexCharacters.size();) {
356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            ++count;
357f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t bump = count * maxLabelCount_ / size;
358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (bump == old) {
359f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                indexCharacters.removeElementAt(i);
360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                old = bump;
362f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                ++i;
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
366f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
368f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
369f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
370f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
371f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (!current.startsWith(BASE, BASE_LENGTH)) {
372f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return current;
373f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
374f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UChar rest = current.charAt(BASE_LENGTH);
375f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (0x2800 < rest && rest <= 0x28FF) { // stroke count
376f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t count = rest-0x2800;
377f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        temp.setTo((UChar)(0x30 + count % 10));
378f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (count >= 10) {
379f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            count /= 10;
380f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            temp.insert(0, (UChar)(0x30 + count % 10));
381f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (count >= 10) {
382f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                count /= 10;
383f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                temp.insert(0, (UChar)(0x30 + count));
384f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
385f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
386f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return temp.append((UChar)0x5283);
387f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
388f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return temp.setTo(current, BASE_LENGTH);
389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
391f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool hasMultiplePrimaryWeights(
392f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        CollationElementIterator &cei, int32_t variableTop,
393f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString &s, UErrorCode &errorCode) {
394f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    cei.setText(s, errorCode);
395f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool seenPrimary = FALSE;
396f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (;;) {
397f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t ce32 = cei.next(errorCode);
398f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (ce32 == CollationElementIterator::NULLORDER) {
399f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            break;
400f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
401f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t p = CollationElementIterator::primaryOrder(ce32);
402f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (p > variableTop && (ce32 & 0xc0) != 0xc0) {
403f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // not primary ignorable, and not a continuation CE
404f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (seenPrimary) {
405f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return TRUE;
406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
407f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            seenPrimary = TRUE;
408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
409f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
410f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return FALSE;
411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
413f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
415f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusBucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
416f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Initialize indexCharacters.
417f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector indexCharacters(errorCode);
418f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    indexCharacters.setDeleter(uprv_deleteUObject);
419f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initLabels(indexCharacters, errorCode);
420f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
421f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
422f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Variables for hasMultiplePrimaryWeights().
423f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<CollationElementIterator> cei(
424f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        collatorPrimaryOnly_->createCollationElementIterator(emptyString_));
425f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (cei.isNull()) {
426f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
427f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
428f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
429f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t variableTop;
430f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
431f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        variableTop = CollationElementIterator::primaryOrder(
432f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            (int32_t)collatorPrimaryOnly_->getVariableTop(errorCode));
433f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
434f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        variableTop = 0;
435f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
436f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool hasInvisibleBuckets = FALSE;
437f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
438f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Helper arrays for Chinese Pinyin collation.
439f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *asciiBuckets[26] = {
440f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
441f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
442f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    };
443f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *pinyinBuckets[26] = {
444f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
445f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
446f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    };
447f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool hasPinyin = FALSE;
448f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
449f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<UVector> bucketList(new UVector(errorCode));
450f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketList.isNull()) {
451f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
452f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
453f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
454f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->setDeleter(uprv_deleteUObject);
455f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
456f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // underflow bucket
457f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
458f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucket == NULL) {
459f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
460f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
461f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
462f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->addElement(bucket, errorCode);
463f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
464f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
465f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString temp;
466f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
467f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // fix up the list, adding underflow, additions, overflow
468f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Insert inflow labels as needed.
469f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t scriptIndex = -1;
470f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *scriptUpperBoundary = &emptyString_;
471f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < indexCharacters.size(); ++i) {
472f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeString &current = *getString(indexCharacters, i);
473f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
474f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // We crossed the script boundary into a new script.
475f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            const UnicodeString &inflowBoundary = *scriptUpperBoundary;
476f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UBool skippedScript = FALSE;
477f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            for (;;) {
478f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
479f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
480f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
481f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
482f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                skippedScript = TRUE;
483f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
484f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (skippedScript && bucketList->size() > 1) {
485f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                // We are skipping one or more scripts,
486f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                // and we are not just getting out of the underflow label.
487f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
488f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (bucket == NULL) {
489f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    errorCode = U_MEMORY_ALLOCATION_ERROR;
490f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    return NULL;
491f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
492f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucketList->addElement(bucket, errorCode);
493f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
494f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
495f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Add a bucket with the current label.
496f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
497f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket == NULL) {
498f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
499f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
500f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
501f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList->addElement(bucket, errorCode);
502f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Remember ASCII and Pinyin buckets for Pinyin redirects.
503f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UChar c;
504f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
505f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            asciiBuckets[c - 0x41] = bucket;
506f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
507f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
508f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            pinyinBuckets[c - 0x41] = bucket;
509f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            hasPinyin = TRUE;
510f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
511f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Check for multiple primary weights.
512f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (!current.startsWith(BASE, BASE_LENGTH) &&
513f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                hasMultiplePrimaryWeights(*cei, variableTop, current, errorCode) &&
514f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
515f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // "AE-ligature" or "Sch" etc.
516f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            for (int32_t i = bucketList->size() - 2;; --i) {
517f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                Bucket *singleBucket = getBucket(*bucketList, i);
518f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
519f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // There is no single-character bucket since the last
520f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // underflow or inflow label.
521f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
522f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
523f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (singleBucket->displayBucket_ == NULL &&
524f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        !hasMultiplePrimaryWeights(
525f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                            *cei, variableTop, singleBucket->lowerBoundary_, errorCode)) {
526f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // Add an invisible bucket that redirects strings greater than the expansion
527f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // to the previous single-character bucket.
528f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // For example, after ... Q R S Sch we add Sch\uFFFF->S
529f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
530f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucket = new Bucket(emptyString_,
531f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        UnicodeString(current).append((UChar)0xFFFF),
532f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        U_ALPHAINDEX_NORMAL);
533f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    if (bucket == NULL) {
534f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        errorCode = U_MEMORY_ALLOCATION_ERROR;
535f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        return NULL;
536f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    }
537f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucket->displayBucket_ = singleBucket;
538f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucketList->addElement(bucket, errorCode);
539f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    hasInvisibleBuckets = TRUE;
540f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
541f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
542f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
543f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
544f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
545f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
546f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketList->size() == 1) {
547f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // No real labels, show only the underflow label.
548f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
549f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bl == NULL) {
550f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
551f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
552f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
553f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList.orphan();
554f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bl;
555f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
556f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // overflow bucket
557f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
558f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucket == NULL) {
559f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
560f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
561f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
562f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->addElement(bucket, errorCode); // final
563f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
564f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (hasPinyin) {
565f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Redirect Pinyin buckets.
566f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Bucket *asciiBucket = NULL;
567f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < 26; ++i) {
568f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (asciiBuckets[i] != NULL) {
569f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                asciiBucket = asciiBuckets[i];
570f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
571f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
572f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                pinyinBuckets[i]->displayBucket_ = asciiBucket;
573f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                hasInvisibleBuckets = TRUE;
574f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
575f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
576f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
577f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
578f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
579f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (!hasInvisibleBuckets) {
580f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
581f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bl == NULL) {
582f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
583f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
584f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
585f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList.orphan();
586f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bl;
587f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
588f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Merge inflow buckets that are visually adjacent.
589f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Iterate backwards: Merge inflow into overflow rather than the other way around.
590f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t i = bucketList->size() - 1;
591f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *nextBucket = getBucket(*bucketList, i);
592f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (--i > 0) {
593f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = getBucket(*bucketList, i);
594f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
595f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            continue;  // skip invisible buckets
596f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
597f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
598f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
599f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucket->displayBucket_ = nextBucket;
600f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                continue;
601f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
602f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
603f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = bucket;
604f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
605f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
606f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<UVector> publicBucketList(new UVector(errorCode));
607f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketList.isNull()) {
608f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
609f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
610f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
611f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Do not call publicBucketList->setDeleter():
612f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This vector shares its objects with the bucketList.
613f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < bucketList->size(); ++i) {
614f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = getBucket(*bucketList, i);
615f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ == NULL) {
616f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            publicBucketList->addElement(bucket, errorCode);
617f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
618f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
619f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
620f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
621f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bl == NULL) {
622f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
623f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
624f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
625f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList.orphan();
626f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    publicBucketList.orphan();
627f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return bl;
628f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
629f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
630f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
631f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Creates an index, and buckets and sorts the list of records into the index.
632f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
633f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
634f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode) || buckets_ != NULL) {
635f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return;
636f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
637f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    buckets_ = createBucketList(errorCode);
638f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
639b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
641b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
642f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Sort the records by name.
643f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Stable sort preserves input order of collation duplicates.
644f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
645f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
646f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Now, we traverse all of the input, which is now sorted.
647f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // If the item doesn't go in the current bucket, we find the next bucket that contains it.
648f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This makes the process order n*log(n), since we just sort the list and then do a linear process.
649f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
650f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // we need to improve it for that case.
651f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
652f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
653f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t bucketIndex = 1;
654f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *nextBucket;
655f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *upperBoundary;
656f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketIndex < buckets_->bucketList_->size()) {
657f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
658f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        upperBoundary = &nextBucket->lowerBoundary_;
659f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
660f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = NULL;
661f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        upperBoundary = NULL;
662f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
663f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < inputList_->size(); ++i) {
664f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Record *r = getRecord(*inputList_, i);
665f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // if the current bucket isn't the right one, find the one that is
666f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // We have a special flag for the last bucket so that we don't look any further
667f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while (upperBoundary != NULL &&
668f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
669f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            currentBucket = nextBucket;
670f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // now reset the boundary that we compare against
671f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (bucketIndex < buckets_->bucketList_->size()) {
672f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
673f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                upperBoundary = &nextBucket->lowerBoundary_;
674b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
675f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                upperBoundary = NULL;
676f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
677f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
678f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // now put the record into the bucket.
679f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Bucket *bucket = currentBucket;
680f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
681f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket = bucket->displayBucket_;
682f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
683f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->records_ == NULL) {
684f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket->records_ = new UVector(errorCode);
685f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (bucket->records_ == NULL) {
686f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
687f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return;
688b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
690f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket->records_->addElement(r, errorCode);
691b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
692f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
693b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
694f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::clearBuckets() {
695f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ != NULL) {
696f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        delete buckets_;
697f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        buckets_ = NULL;
698f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        internalResetBucketIterator();
699f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
700f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
701f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
702f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::internalResetBucketIterator() {
703f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    labelsIterIndex_ = -1;
704f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    currentBucket_ = NULL;
705b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
707b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
708f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
709f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return; }
710f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Chinese index characters, which are specific to each of the several Chinese tailorings,
711f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // take precedence over the single locale data exemplar set per language.
712f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const char *language = locale.getLanguage();
713f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 ||
714f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            uprv_strcmp(language, "ko") == 0) {
715f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // TODO: This should be done regardless of the language, but it's expensive.
716f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // We should add a Collator function (can be @internal)
717f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // to enumerate just the contractions that start with a given code point or string.
718f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (addChineseIndexCharacters(status) || U_FAILURE(status)) {
719f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return;
720f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
721f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
722f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
723f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
726b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
727b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
728b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeSet exemplars;
729b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
730b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_SUCCESS(status)) {
731f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        initialLabels_->addAll(exemplars);
732b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
733b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
734b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
735b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
736f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // The locale data did not include explicit Index characters.
737b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Synthesize a set of them from the locale's standard exemplar characters.
738b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
739b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
741b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
742b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
743f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // question: should we add auxiliary exemplars?
744f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
745f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        exemplars.add(0x61, 0x7A);
746f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
747f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
748f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // cut down to small list
749f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        exemplars.remove(0xAC00, 0xD7A3).
750f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
751f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
752f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
753f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xD30C).add(0xD558);
754f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
755f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
756f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // cut down to small list
757f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // make use of the fact that Ethiopic is allocated in 8's, where
758f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // the base is 0 mod 8.
759f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeSet ethiopic(
760f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
761f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeSetIterator it(ethiopic);
762f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while (it.next() && !it.isString()) {
763f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if ((it.getCodepoint() & 0x7) != 0) {
764f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                exemplars.remove(it.getCodepoint());
765f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
766f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
767f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
768f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Upper-case any that aren't already so.
770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //   (We only do this for synthesized index characters.)
771b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeSetIterator it(exemplars);
772b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString upperC;
773b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while (it.next()) {
774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UnicodeString &exemplarC = it.getString();
775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        upperC = exemplarC;
776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        upperC.toUpper(locale);
777f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        initialLabels_->add(upperC);
778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
779f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
781f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
782f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSet contractions;
783f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    ucol_getContractionsAndExpansions(collatorPrimaryOnly_->getUCollator(),
784f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                      contractions.toUSet(), NULL, FALSE, &errorCode);
785f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return FALSE; }
786f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString firstHanBoundary;
787f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool hasPinyin = FALSE;
788f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSetIterator iter(contractions);
789f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (iter.next()) {
790f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString &s = iter.getString();
791f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (s.startsWith(BASE, BASE_LENGTH)) {
792f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            initialLabels_->add(s);
793f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (firstHanBoundary.isEmpty() ||
794f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    collatorPrimaryOnly_->compare(s, firstHanBoundary, errorCode) < 0) {
795f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                firstHanBoundary = s;
796f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
797f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UChar c = s.charAt(s.length() - 1);
798f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (0x41 <= c && c <= 0x5A) {  // A-Z
799f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                hasPinyin = TRUE;
800f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
801f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
803f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (hasPinyin) {
804f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        initialLabels_->add(0x41, 0x5A);  // A-Z
805f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
806f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (!firstHanBoundary.isEmpty()) {
807f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // The hardcoded list of script boundaries includes U+4E00
808f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // which is tailored to not be the first primary
809f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // in all Chinese tailorings except "unihan".
810f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Replace U+4E00 with the first boundary string from the tailoring.
811f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // TODO: This becomes obsolete when the root collator gets
812f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // reliable script-first-primary mappings.
813f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t hanIndex = binarySearch(
814f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                *firstCharsInScripts_, UnicodeString((UChar)0x4E00), *collatorPrimaryOnly_);
815f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (hanIndex >= 0) {
816f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UnicodeString *fh = new UnicodeString(firstHanBoundary);
817f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (fh == NULL) {
818f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
819f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return FALSE;
820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
821f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            firstCharsInScripts_->setElementAt(fh, hanIndex);
822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
823f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return TRUE;
824f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
825f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return FALSE;
826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
827b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
829b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
830b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
832b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
833f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar CGJ = 0x034F;
834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString result;
836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (item.length() == 0) {
837b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return result;
838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
839b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t i = 0;
840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for (;;) {
841b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UChar32  cp = item.char32At(i);
842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.append(cp);
843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        i = item.moveIndex32(i, 1);
844b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (i >= item.length()) {
845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
846b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
847b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.append(CGJ);
848b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
849b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return result;
850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
851b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return FALSE;
855b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return FALSE;
860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst RuleBasedCollator &AlphabeticIndex::getCollator() const {
864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // There are no known non-RuleBasedCollator collators, and none ever expected.
865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // But, in case that changes, better a null pointer than a wrong type.
866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *dynamic_cast<RuleBasedCollator *>(collator_);
867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
870b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getInflowLabel() const {
871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return inflowLabel_;
872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
874b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getOverflowLabel() const {
875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return overflowLabel_;
876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
879b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
880b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return underflowLabel_;
881b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
882b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
883b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
884b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
885b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inflowLabel_ = label;
886f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
887b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
889b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
890b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
891b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
892b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    overflowLabel_ = label;
893f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
894b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
896b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
897b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
898b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
899b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    underflowLabel_ = label;
900f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
901b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
902b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
903b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
904b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
905b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getMaxLabelCount() const {
906b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return maxLabelCount_;
907b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
908b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
909b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
910b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
911b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
912b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
913b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
914b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (maxLabelCount <= 0) {
915b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
918b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    maxLabelCount_ = maxLabelCount;
919f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
922b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
923b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
924b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
925b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  init() - Common code for constructors.
926b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
927b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
928f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
929f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return; }
930f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (locale == NULL && collator_ == NULL) {
931f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_ILLEGAL_ARGUMENT_ERROR;
932b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
933b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
934f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
935b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    initialLabels_         = new UnicodeSet();
936f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (initialLabels_ == NULL) {
937f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
938f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return;
939f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
940b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
941f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
942b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    overflowLabel_ = inflowLabel_;
943b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    underflowLabel_ = inflowLabel_;
944b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
945f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collator_ == NULL) {
946f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        collator_ = static_cast<RuleBasedCollator *>(Collator::createInstance(*locale, status));
947f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (U_FAILURE(status)) { return; }
948f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collator_ == NULL) {
949f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
950f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return;
951f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
952f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
953f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
954f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collatorPrimaryOnly_ == NULL) {
955f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
956b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
957b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
958f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
959f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    firstCharsInScripts_ = firstStringsInScript(status);
960f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return; }
961f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
962f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString _4E00((UChar)0x4E00);
963f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString _1100((UChar)0x1100);
964f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString _1112((UChar)0x1112);
965f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collatorPrimaryOnly_->compare(_4E00, _1112, status) <= 0 &&
966f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            collatorPrimaryOnly_->compare(_1100, _4E00, status) <= 0) {
967f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // The standard Korean tailoring sorts Hanja (Han characters)
968f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // as secondary differences from Hangul syllables.
969f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // This makes U+4E00 not useful as a Han-script boundary.
970f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // TODO: This becomes obsolete when the root collator gets
971f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // reliable script-first-primary mappings.
972f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t hanIndex = binarySearch(
973f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                *firstCharsInScripts_, _4E00, *collatorPrimaryOnly_);
974f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (hanIndex >= 0) {
975f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            firstCharsInScripts_->removeElementAt(hanIndex);
976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
977f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
978f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Guard against a degenerate collator where
979f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // some script boundary strings are primary ignorable.
980f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (;;) {
981f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (U_FAILURE(status)) { return; }
982f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (firstCharsInScripts_->isEmpty()) {
983f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // AlphabeticIndex requires some non-ignorable script boundary strings.
984f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_ILLEGAL_ARGUMENT_ERROR;
985f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return;
986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
987f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(
988f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
989f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                emptyString_, status) == UCOL_EQUAL) {
990f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            firstCharsInScripts_->removeElementAt(0);
991f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
992f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            break;
993b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
994b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
995b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
996f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (locale != NULL) {
997f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        addIndexExemplars(*locale, status);
998b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
999b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1000b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1001b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1002b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1003b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Comparison function for UVector<UnicodeString *> sorting with a collator.
1004b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1005b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
1006f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right) {
1007103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *leftElement = static_cast<const UElement *>(left);
1008103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *rightElement = static_cast<const UElement *>(right);
1009103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
1010103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
1011b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1012b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (leftString == rightString) {
1013b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Catches case where both are NULL
1014b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
1015b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (leftString == NULL) {
1017b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 1;
1018b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
1019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (rightString == NULL) {
1020b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return -1;
1021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1022f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const Collator *col = static_cast<const Collator *>(context);
1023f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode errorCode = U_ZERO_ERROR;
1024f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return col->compare(*leftString, *rightString, errorCode);
1025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1027b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1028b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Comparison function for UVector<Record *> sorting with a collator.
1029b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1030b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
1031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right) {
1032103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *leftElement = static_cast<const UElement *>(left);
1033103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *rightElement = static_cast<const UElement *>(right);
1034103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
1035103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
1036b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const Collator *col = static_cast<const Collator *>(context);
1037f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode errorCode = U_ZERO_ERROR;
1038f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return col->compare(leftRec->name_, rightRec->name_, errorCode);
1039b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1040b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1041b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1042f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
1043f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * This list contains one character per script that has the
1044f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * lowest primary weight for that script in the root collator.
1045f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * This list will be copied and sorted to account for script reordering.
1046f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *
1047f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * <p>TODO: This is fragile. If the first character of a script is tailored
1048f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * so that it does not map to the script's lowest primary weight any more,
1049f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * then the buckets will be off.
1050f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * There are hacks in the code to handle the known CJK tailorings of U+4E00.
1051f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *
1052f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * <p>We use "A" not "a" because the en_US_POSIX tailoring sorts A primary-before a.
1053f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *
1054f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Keep this in sync with HACK_FIRST_CHARS_IN_SCRIPTS in
1055f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * ICU4J main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
1056f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
1057f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar HACK_FIRST_CHARS_IN_SCRIPTS[] =  {
1058f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x41, 0, 0x03B1, 0,
1059f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x2C81, 0, 0x0430, 0, 0x2C30, 0, 0x10D0, 0, 0x0561, 0, 0x05D0, 0, 0xD802, 0xDD00, 0, 0x0800, 0, 0x0621, 0, 0x0710, 0,
1060f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x0780, 0, 0x07CA, 0, 0x2D30, 0, 0x1200, 0, 0x0950, 0, 0x0985, 0, 0x0A74, 0, 0x0AD0, 0, 0x0B05, 0, 0x0BD0, 0,
1061f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x0C05, 0, 0x0C85, 0, 0x0D05, 0, 0x0D85, 0,
1062f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xAAF2, 0,  // Meetei Mayek
1063f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xA800, 0, 0xA882, 0, 0xD804, 0xDC83, 0,
1064f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    U16_LEAD(0x111C4), U16_TRAIL(0x111C4), 0,  // Sharada
1065f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    U16_LEAD(0x11680), U16_TRAIL(0x11680), 0,  // Takri
1066f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x1B83, 0,
1067f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xD802, 0xDE00, 0, 0x0E01, 0,
1068f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x0EDE, 0,  // Lao
1069f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xAA80, 0, 0x0F40, 0, 0x1C00, 0, 0xA840, 0, 0x1900, 0, 0x1700, 0, 0x1720, 0,
1070f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x1740, 0, 0x1760, 0, 0x1A00, 0, 0xA930, 0, 0xA90A, 0, 0x1000, 0,
1071f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    U16_LEAD(0x11103), U16_TRAIL(0x11103), 0,  // Chakma
1072f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x1780, 0, 0x1950, 0, 0x1980, 0, 0x1A20, 0,
1073f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xAA00, 0, 0x1B05, 0, 0xA984, 0, 0x1880, 0, 0x1C5A, 0, 0x13A0, 0, 0x1401, 0, 0x1681, 0, 0x16A0, 0, 0xD803, 0xDC00, 0,
1074f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xA500, 0, 0xA6A0, 0, 0x1100, 0, 0x3041, 0, 0x30A1, 0, 0x3105, 0, 0xA000, 0, 0xA4F8, 0,
1075f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    U16_LEAD(0x16F00), U16_TRAIL(0x16F00), 0,  // Miao
1076f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xD800, 0xDE80, 0,
1077f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xD800, 0xDEA0, 0, 0xD802, 0xDD20, 0, 0xD800, 0xDF00, 0, 0xD800, 0xDF30, 0, 0xD801, 0xDC28, 0, 0xD801, 0xDC50, 0,
1078f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xD801, 0xDC80, 0,
1079f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    U16_LEAD(0x110D0), U16_TRAIL(0x110D0), 0,  // Sora Sompeng
1080f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xD800, 0xDC00, 0, 0xD802, 0xDC00, 0, 0xD802, 0xDE60, 0, 0xD802, 0xDF00, 0, 0xD802, 0xDC40, 0,
1081f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xD802, 0xDF40, 0, 0xD802, 0xDF60, 0, 0xD800, 0xDF80, 0, 0xD800, 0xDFA0, 0, 0xD808, 0xDC00, 0, 0xD80C, 0xDC00, 0,
1082f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    U16_LEAD(0x109A0), U16_TRAIL(0x109A0), 0,  // Meroitic Cursive
1083f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    U16_LEAD(0x10980), U16_TRAIL(0x10980), 0,  // Meroitic Hieroglyphs
1084f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0x4E00, 0,
1085f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // TODO: The overflow bucket's lowerBoundary string should be the
1086f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // first item after the last reordering group in the collator's script order.
1087f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This should normally be the first Unicode code point
1088f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // that is unassigned (U+0378 in Unicode 6.3) and untailored.
1089f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // However, at least up to ICU 51 the Hani reordering group includes
1090f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // unassigned code points,
1091f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // and there is no stable string for the start of the trailing-weights range.
1092f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // The only known string that sorts "high" is U+FFFF.
1093f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // When ICU separates Hani vs. unassigned reordering groups, we need to fix this,
1094f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // and fix relevant test code.
1095f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Ideally, FractionalUCA.txt will have a "script first primary"
1096f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // for unassigned code points.
1097f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    0xFFFF, 0
1098f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius};
1099b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
1101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
1103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UVector *dest = new UVector(status);
1105103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if (dest == NULL) {
1106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
1107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
1108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1109103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    dest->setDeleter(uprv_deleteUObject);
1110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *src  = HACK_FIRST_CHARS_IN_SCRIPTS;
1111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UChar *limit = src + LENGTHOF(HACK_FIRST_CHARS_IN_SCRIPTS);
1112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
1113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (U_FAILURE(status)) {
1114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return dest;
1115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
1116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UnicodeString *str = new UnicodeString(src, -1);
1117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (str == NULL) {
1118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            status = U_MEMORY_ALLOCATION_ERROR;
1119f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return dest;
1120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
1121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        dest->addElement(str, status);
1122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        src += str->length() + 1;
1123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while (src < limit);
1124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return dest;
1125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
1129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
1131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Returns true if one index character string is "better" than the other.
1132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1133f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * better, and otherwise binary-less-than is better.
1134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
1135f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1136f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                const UnicodeString &one, const UnicodeString &other) {
1137f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This is called with primary-equal strings, but never with one.equals(other).
1138f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode status = U_ZERO_ERROR;
1139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1140f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1141f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return FALSE; }
1142f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t result = n1.countChar32() - n2.countChar32();
1143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (result != 0) {
1144f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return result < 0;
1145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    result = n1.compareCodePointOrder(n2);
1147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (result != 0) {
1148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return result < 0;
1149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return one.compareCodePointOrder(other) < 0;
1151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
1154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Constructor & Destructor for AlphabeticIndex::Record
1157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//     Records are internal only, instances are not directly surfaced in the public API.
1159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//     This class is mostly struct-like, with all public fields.
1160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : name_(name), data_(data) {}
1163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
1164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Record::~Record() {
1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
1171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1172f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (inputList_ == NULL) {
1173f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_ = new UVector(status);
1174f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (inputList_ == NULL) {
1175f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
1176f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return *this;
1177f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
1178f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_->setDeleter(alphaIndex_deleteRecord);
1179f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Record *r = new Record(name, data);
1181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (r == NULL) {
1182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
1183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return *this;
1184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inputList_->addElement(r, status);
1186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
1187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::string ss;
1188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::string ss2;
1189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1197f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_->removeAllElements();
1198f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        clearBuckets();
1199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1204f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
1205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
1207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex() const {
1213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return labelsIterIndex_;
1214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ == NULL && currentBucket_ != NULL) {
1222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ENUM_OUT_OF_SYNC_ERROR;
1223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1225f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
1226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ++labelsIterIndex_;
1230f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1231f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        labelsIterIndex_ = buckets_->getBucketCount();
1232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1234f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    resetRecordIterator();
1236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return TRUE;
1237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getBucketLabel() const {
1240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ != NULL) {
1241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->label_;
1242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1243f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return emptyString_;
1244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ != NULL) {
1250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->labelType_;
1251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return U_ALPHAINDEX_NORMAL;
1253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketRecordCount() const {
1258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->records_->size();
1260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
1262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
1269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    internalResetBucketIterator();
1271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ == NULL) {
1280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // We are trying to iterate over the items in a bucket, but there is no
1281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // current bucket from the enumeration of buckets.
1282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_INVALID_STATE_ERROR;
1283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1285f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ == NULL) {
1286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ENUM_OUT_OF_SYNC_ERROR;
1287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1289f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_->records_ == NULL) {
1290f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return FALSE;
1291f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ++itemsIterIndex_;
1293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_  = currentBucket_->records_->size();
1295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return TRUE;
1298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getRecordName() const {
1302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *retStr = &emptyString_;
1303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ >= 0 &&
1305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ < currentBucket_->records_->size()) {
1306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            retStr = &item->name_;
1308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *retStr;
1310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst void *AlphabeticIndex::getRecordData() const {
1313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const void *retPtr = NULL;
1314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ >= 0 &&
1316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ < currentBucket_->records_->size()) {
1317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            retPtr = item->data_;
1319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return retPtr;
1321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    itemsIterIndex_ = -1;
1326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                const UnicodeString &lowerBoundary,
1333f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                UAlphabeticIndexLabelType type)
1334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          displayBucket_(NULL), displayIndex_(-1),
1336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          records_(NULL) {
1337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::~Bucket() {
1341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete records_;
1342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
1345103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
1346103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif
1347