1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2009-2014, International Business Machines Corporation and
4f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* others. All Rights Reserved.
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/alphaindex.h"
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/coll.h"
14f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/localpointer.h"
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/normalizer2.h"
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/tblcoll.h"
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uchar.h"
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ulocdata.h"
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uniset.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h"
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/usetiter.h"
22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
24f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "cmemory.h"
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cstring.h"
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h"
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uvector.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h"
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//#include <string>
31103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//#include <iostream>
32f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
35f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
37f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
38f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Prefix string for Chinese index buckets.
39f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
40f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
41f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UChar BASE[1] = { 0xFDD0 };
42f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst int32_t BASE_LENGTH = 1;
43f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
44f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
45f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                const UnicodeString &one, const UnicodeString &other);
46f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
47f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
50f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right);
51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right);
54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  UVector<Record *> support function, delete a Record.
56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void U_CALLCONV
57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoalphaIndex_deleteRecord(void *obj) {
58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete static_cast<AlphabeticIndex::Record *>(obj);
59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
61f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
63f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
64f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                           UErrorCode &errorCode) {
65f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
66f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (owned.isValid()) {
67f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return owned.orphan();
68f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
69f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString *p = new UnicodeString(s);
70f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (p == NULL) {
71f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
72f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
73f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return p;
74f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
76f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline UnicodeString *getString(const UVector &list, int32_t i) {
77f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<UnicodeString *>(list[i]);
78f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
80f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
81f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<AlphabeticIndex::Bucket *>(list[i]);
82f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
84f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
85f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<AlphabeticIndex::Record *>(list[i]);
86f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
87f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
88f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
89f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Like Java Collections.binarySearch(List, String, Comparator).
90f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *
91f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * @return the index>=0 where the item was found,
92f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *         or the index<0 for inserting the string at ~index in sorted order
93f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
94f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
95f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (list.size() == 0) { return ~0; }
96f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t start = 0;
97f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t limit = list.size();
98f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (;;) {
99f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t i = (start + limit) / 2;
100f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UErrorCode errorCode = U_ZERO_ERROR;
102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UCollationResult cmp = coll.compare(s, *si, errorCode);
103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (cmp == UCOL_EQUAL) {
104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return i;
105f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (cmp < 0) {
106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (i == start) {
107f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return ~start;  // insert s before *si
108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            limit = i;
110f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (i == start) {
112f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return ~(start + 1);  // insert s after *si
113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
114f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            start = i;
115f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
119f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
120f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// The BucketList is not in the anonymous namespace because only Clang
122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// seems to support its use in other classes from there.
123f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// However, we also don't need U_I18N_API because it is not used from outside the i18n library.
124f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusclass BucketList : public UObject {
125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliuspublic:
126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    BucketList(UVector *bucketList, UVector *publicBucketList)
127f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t displayIndex = 0;
129f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < publicBucketList->size(); ++i) {
130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // The virtual destructor must not be inline.
1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // See ticket #8454 for details.
1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    virtual ~BucketList();
137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
138f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t getBucketCount() const {
139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return immutableVisibleList_->size();
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
141f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
142f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                           UErrorCode &errorCode) {
144f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // binary search
145f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t start = 0;
146f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t limit = bucketList_->size();
147f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while ((start + 1) < limit) {
148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t i = (start + limit) / 2;
149f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UCollationResult nameVsBucket =
151f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
152f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (nameVsBucket < 0) {
153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                limit = i;
154f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            } else {
155f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                start = i;
156f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
157f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
159f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
160f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket = bucket->displayBucket_;
161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bucket->displayIndex_;
163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
164f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
165f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    /** All of the buckets, visible and invisible. */
166f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector *bucketList_;
167f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    /** Just the visible buckets. */
168f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector *immutableVisibleList_;
169f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius};
170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
1718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusBucketList::~BucketList() {
1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete bucketList_;
1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (immutableVisibleList_ != bucketList_) {
1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        delete immutableVisibleList_;
1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
178f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
179f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete buckets_;
180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete collatorPrimaryOnly_;
181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t
184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketCount() const {
185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketCount();
186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
187f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
188f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t
189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketIndex(
190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString &name, UErrorCode &errorCode) const {
191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
192f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
193f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst AlphabeticIndex::Bucket *
195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (0 <= index && index < buckets_->getBucketCount()) {
197f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return icu::getBucket(*buckets_->immutableVisibleList_, index);
198f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
199f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
203f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
204f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : inputList_(NULL),
205f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
206f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          maxLabelCount_(99),
207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          initialLabels_(NULL), firstCharsInScripts_(NULL),
208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          collator_(NULL), collatorPrimaryOnly_(NULL),
209f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          buckets_(NULL) {
210f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    init(&locale, status);
211f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
212f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
213f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
215f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : inputList_(NULL),
216f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
217f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          maxLabelCount_(99),
218f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          initialLabels_(NULL), firstCharsInScripts_(NULL),
219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          collator_(collator), collatorPrimaryOnly_(NULL),
220f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          buckets_(NULL) {
221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    init(NULL, status);
222f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
223f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
224f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::~AlphabeticIndex() {
227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete collator_;
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete collatorPrimaryOnly_;
229f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete firstCharsInScripts_;
230f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete buckets_;
231f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete inputList_;
232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete initialLabels_;
233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    initialLabels_->addAll(additions);
241f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    addIndexExemplars(locale, status);
248f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
253f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
254f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
255f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // In C++, the ImmutableIndex must own its copy of the BucketList,
256f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // even if it contains no records, for proper memory management.
257f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We could clone the buckets_ if they are not NULL,
258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // but that would be worth it only if this method is called multiple times,
259f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // or called after using the old-style bucket iterator API.
260f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
261f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<RuleBasedCollator> coll(
262f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
263f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (immutableBucketList.isNull() || coll.isNull()) {
264f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
265f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
266f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
267f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
268f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (immIndex == NULL) {
269f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
271f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
272f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // The ImmutableIndex adopted its parameter objects.
273f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    immutableBucketList.orphan();
274f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    coll.orphan();
275f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return immIndex;
276f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
277f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
279f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
283f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketCount();
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
288f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status) || inputList_ == NULL) {
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
291f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return inputList_->size();
292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
294f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
295f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
296f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return; }
297f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
298f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
299f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString &overflowBoundary =
300f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
301f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We make a sorted array of elements.
303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Some of the input may be redundant.
304f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
305f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We filter out those cases.
306f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSetIterator iter(*initialLabels_);
307f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (iter.next()) {
308f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString *item = &iter.getString();
309f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        LocalPointer<UnicodeString> ownedItem;
310f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UBool checkDistinct;
311f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t itemLength = item->length();
312f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (!item->hasMoreChar32Than(0, itemLength, 1)) {
313f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = FALSE;
314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
315f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                item->charAt(itemLength - 2) != 0x2a) {
316f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Use a label if it is marked with one trailing star,
317f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // even if the label string sorts the same when all contractions are suppressed.
318f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
319f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            item = ownedItem.getAlias();
320f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (item == NULL) {
321f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
322f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return;
323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
324f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = FALSE;
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
326f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = TRUE;
327f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
328f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
329f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Ignore a primary-ignorable or non-alphabetic index character.
330f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Ignore an index character that will land in the overflow bucket.
332f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (checkDistinct &&
333f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Ignore a multi-code point index character that does not sort distinctly
335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // from the sequence of its separate characters.
336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
337f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
338f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (insertionPoint < 0) {
339f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                indexCharacters.insertElementAt(
340f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
341f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            } else {
342f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
343f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
344f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    indexCharacters.setElementAt(
345f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        ownedString(*item, ownedItem, errorCode), insertionPoint);
346f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
347f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
350f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return; }
351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
352f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
354f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t size = indexCharacters.size() - 1;
355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (size > maxLabelCount_) {
356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t count = 0;
357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t old = -1;
358f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < indexCharacters.size();) {
359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            ++count;
360f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t bump = count * maxLabelCount_ / size;
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (bump == old) {
362f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                indexCharacters.removeElementAt(i);
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                old = bump;
365f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                ++i;
366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
369f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
371f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
372f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
373f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
374f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (!current.startsWith(BASE, BASE_LENGTH)) {
375f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return current;
376f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
377f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UChar rest = current.charAt(BASE_LENGTH);
378f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (0x2800 < rest && rest <= 0x28FF) { // stroke count
379f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t count = rest-0x2800;
380f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        temp.setTo((UChar)(0x30 + count % 10));
381f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (count >= 10) {
382f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            count /= 10;
383f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            temp.insert(0, (UChar)(0x30 + count % 10));
384f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (count >= 10) {
385f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                count /= 10;
386f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                temp.insert(0, (UChar)(0x30 + count));
387f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
388f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
389f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return temp.append((UChar)0x5283);
390f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
391f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return temp.setTo(current, BASE_LENGTH);
392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
394f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool hasMultiplePrimaryWeights(
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const RuleBasedCollator &coll, uint32_t variableTop,
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ces.removeAllElements();
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll.internalGetCEs(s, ces, errorCode);
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(errorCode)) { return FALSE; }
400f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool seenPrimary = FALSE;
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for (int32_t i = 0; i < ces.size(); ++i) {
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int64_t ce = ces.elementAti(i);
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t p = (uint32_t)(ce >> 32);
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (p > variableTop) {
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // not primary ignorable
406f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (seenPrimary) {
407f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return TRUE;
408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
409f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            seenPrimary = TRUE;
410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
411f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
412f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return FALSE;
413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
415f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
417f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusBucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
418f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Initialize indexCharacters.
419f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector indexCharacters(errorCode);
420f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    indexCharacters.setDeleter(uprv_deleteUObject);
421f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initLabels(indexCharacters, errorCode);
422f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
423f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
424f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Variables for hasMultiplePrimaryWeights().
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector64 ces(errorCode);
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t variableTop;
427f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
429f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
430f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        variableTop = 0;
431f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
432f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool hasInvisibleBuckets = FALSE;
433f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
434f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Helper arrays for Chinese Pinyin collation.
435f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *asciiBuckets[26] = {
436f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
437f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
438f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    };
439f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *pinyinBuckets[26] = {
440f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
441f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
442f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    };
443f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool hasPinyin = FALSE;
444f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
4451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
4461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(errorCode)) {
447f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
448f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
449f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->setDeleter(uprv_deleteUObject);
450f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
451f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // underflow bucket
452f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
453f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucket == NULL) {
454f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
455f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
456f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
457f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->addElement(bucket, errorCode);
458f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
459f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
460f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString temp;
461f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
462f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // fix up the list, adding underflow, additions, overflow
463f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Insert inflow labels as needed.
464f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t scriptIndex = -1;
465f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *scriptUpperBoundary = &emptyString_;
466f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < indexCharacters.size(); ++i) {
467f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeString &current = *getString(indexCharacters, i);
468f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
469f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // We crossed the script boundary into a new script.
470f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            const UnicodeString &inflowBoundary = *scriptUpperBoundary;
471f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UBool skippedScript = FALSE;
472f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            for (;;) {
473f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
474f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
475f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
476f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
477f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                skippedScript = TRUE;
478f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
479f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (skippedScript && bucketList->size() > 1) {
480f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                // We are skipping one or more scripts,
481f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                // and we are not just getting out of the underflow label.
482f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
483f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (bucket == NULL) {
484f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    errorCode = U_MEMORY_ALLOCATION_ERROR;
485f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    return NULL;
486f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
487f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucketList->addElement(bucket, errorCode);
488f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
489f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
490f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Add a bucket with the current label.
491f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
492f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket == NULL) {
493f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
494f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
495f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
496f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList->addElement(bucket, errorCode);
497f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Remember ASCII and Pinyin buckets for Pinyin redirects.
498f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UChar c;
499f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
500f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            asciiBuckets[c - 0x41] = bucket;
501f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
502f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
503f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            pinyinBuckets[c - 0x41] = bucket;
504f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            hasPinyin = TRUE;
505f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
506f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Check for multiple primary weights.
507f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (!current.startsWith(BASE, BASE_LENGTH) &&
508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          ces, errorCode) &&
510f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
511f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // "AE-ligature" or "Sch" etc.
512f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            for (int32_t i = bucketList->size() - 2;; --i) {
513f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                Bucket *singleBucket = getBucket(*bucketList, i);
514f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
515f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // There is no single-character bucket since the last
516f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // underflow or inflow label.
517f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
518f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
519f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (singleBucket->displayBucket_ == NULL &&
520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                                   singleBucket->lowerBoundary_,
522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                                   ces, errorCode)) {
523f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // Add an invisible bucket that redirects strings greater than the expansion
524f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // to the previous single-character bucket.
525f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // For example, after ... Q R S Sch we add Sch\uFFFF->S
526f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
527f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucket = new Bucket(emptyString_,
528f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        UnicodeString(current).append((UChar)0xFFFF),
529f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        U_ALPHAINDEX_NORMAL);
530f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    if (bucket == NULL) {
531f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        errorCode = U_MEMORY_ALLOCATION_ERROR;
532f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        return NULL;
533f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    }
534f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucket->displayBucket_ = singleBucket;
535f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucketList->addElement(bucket, errorCode);
536f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    hasInvisibleBuckets = TRUE;
537f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
538f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
539f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
540f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
541f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
542f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
543f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketList->size() == 1) {
544f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // No real labels, show only the underflow label.
545f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
546f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bl == NULL) {
547f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
548f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
549f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
550f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList.orphan();
551f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bl;
552f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
553f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // overflow bucket
554f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
555f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucket == NULL) {
556f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
557f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
558f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
559f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->addElement(bucket, errorCode); // final
560f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
561f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (hasPinyin) {
562f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Redirect Pinyin buckets.
563f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Bucket *asciiBucket = NULL;
564f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < 26; ++i) {
565f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (asciiBuckets[i] != NULL) {
566f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                asciiBucket = asciiBuckets[i];
567f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
568f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
569f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                pinyinBuckets[i]->displayBucket_ = asciiBucket;
570f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                hasInvisibleBuckets = TRUE;
571f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
572f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
573f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
574f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
575f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
576f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (!hasInvisibleBuckets) {
577f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
578f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bl == NULL) {
579f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
580f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
581f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
582f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList.orphan();
583f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bl;
584f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
585f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Merge inflow buckets that are visually adjacent.
586f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Iterate backwards: Merge inflow into overflow rather than the other way around.
587f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t i = bucketList->size() - 1;
588f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *nextBucket = getBucket(*bucketList, i);
589f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (--i > 0) {
590f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = getBucket(*bucketList, i);
591f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
592f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            continue;  // skip invisible buckets
593f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
594f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
595f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
596f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucket->displayBucket_ = nextBucket;
597f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                continue;
598f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
599f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
600f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = bucket;
601f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
602f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
6031b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
6041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(errorCode)) {
605f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
606f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
607f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Do not call publicBucketList->setDeleter():
608f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This vector shares its objects with the bucketList.
609f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < bucketList->size(); ++i) {
610f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = getBucket(*bucketList, i);
611f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ == NULL) {
612f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            publicBucketList->addElement(bucket, errorCode);
613f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
614f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
615f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
616f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
617f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bl == NULL) {
618f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
619f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
620f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
621f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList.orphan();
622f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    publicBucketList.orphan();
623f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return bl;
624f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
625f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
626f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
627f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Creates an index, and buckets and sorts the list of records into the index.
628f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
629f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
630f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode) || buckets_ != NULL) {
631f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return;
632f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
633f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    buckets_ = createBucketList(errorCode);
634f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
635b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
636b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
637b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
638f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Sort the records by name.
639f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Stable sort preserves input order of collation duplicates.
640f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
641f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
642f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Now, we traverse all of the input, which is now sorted.
643f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // If the item doesn't go in the current bucket, we find the next bucket that contains it.
644f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This makes the process order n*log(n), since we just sort the list and then do a linear process.
645f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
646f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // we need to improve it for that case.
647f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
648f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
649f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t bucketIndex = 1;
650f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *nextBucket;
651f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *upperBoundary;
652f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketIndex < buckets_->bucketList_->size()) {
653f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
654f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        upperBoundary = &nextBucket->lowerBoundary_;
655f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
656f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = NULL;
657f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        upperBoundary = NULL;
658f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
659f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < inputList_->size(); ++i) {
660f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Record *r = getRecord(*inputList_, i);
661f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // if the current bucket isn't the right one, find the one that is
662f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // We have a special flag for the last bucket so that we don't look any further
663f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while (upperBoundary != NULL &&
664f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
665f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            currentBucket = nextBucket;
666f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // now reset the boundary that we compare against
667f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (bucketIndex < buckets_->bucketList_->size()) {
668f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
669f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                upperBoundary = &nextBucket->lowerBoundary_;
670b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
671f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                upperBoundary = NULL;
672f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
673f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
674f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // now put the record into the bucket.
675f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Bucket *bucket = currentBucket;
676f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
677f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket = bucket->displayBucket_;
678f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
679f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->records_ == NULL) {
680f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket->records_ = new UVector(errorCode);
681f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (bucket->records_ == NULL) {
682f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
683f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return;
684b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
685b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
686f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket->records_->addElement(r, errorCode);
687b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
688f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
690f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::clearBuckets() {
691f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ != NULL) {
692f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        delete buckets_;
693f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        buckets_ = NULL;
694f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        internalResetBucketIterator();
695f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
696f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
697f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
698f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::internalResetBucketIterator() {
699f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    labelsIterIndex_ = -1;
700f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    currentBucket_ = NULL;
701b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
702b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
703b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
707b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
708b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
709b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
710b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeSet exemplars;
711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
712b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_SUCCESS(status)) {
713f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        initialLabels_->addAll(exemplars);
714b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
715b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
716b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
717b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
718f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // The locale data did not include explicit Index characters.
719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Synthesize a set of them from the locale's standard exemplar characters.
720b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
721b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
722b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
723b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
725f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // question: should we add auxiliary exemplars?
726f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
727f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        exemplars.add(0x61, 0x7A);
728f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
729f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
730f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // cut down to small list
731f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        exemplars.remove(0xAC00, 0xD7A3).
732f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
733f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
734f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
735f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xD30C).add(0xD558);
736f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
737f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
738f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // cut down to small list
739f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // make use of the fact that Ethiopic is allocated in 8's, where
740f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // the base is 0 mod 8.
741f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeSet ethiopic(
742f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
743f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeSetIterator it(ethiopic);
744f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while (it.next() && !it.isString()) {
745f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if ((it.getCodepoint() & 0x7) != 0) {
746f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                exemplars.remove(it.getCodepoint());
747f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
748f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
749f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
750f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Upper-case any that aren't already so.
752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //   (We only do this for synthesized index characters.)
753b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeSetIterator it(exemplars);
754b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString upperC;
755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while (it.next()) {
756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UnicodeString &exemplarC = it.getString();
757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        upperC = exemplarC;
758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        upperC.toUpper(locale);
759f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        initialLabels_->add(upperC);
760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
761f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
762b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
763f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
764f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSet contractions;
765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
766fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
767fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    initialLabels_->addAll(contractions);
768f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSetIterator iter(contractions);
769f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (iter.next()) {
770f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString &s = iter.getString();
771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar c = s.charAt(s.length() - 1);
773fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (0x41 <= c && c <= 0x5A) {  // A-Z
774fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // There are Pinyin labels, add ASCII A-Z labels as well.
775fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            initialLabels_->add(0x41, 0x5A);  // A-Z
776fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
782b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
783b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
785b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
786f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar CGJ = 0x034F;
787b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
788b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString result;
789b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (item.length() == 0) {
790b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return result;
791b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
792b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t i = 0;
793b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for (;;) {
794b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UChar32  cp = item.char32At(i);
795b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.append(cp);
796b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        i = item.moveIndex32(i, 1);
797b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (i >= item.length()) {
798b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
799b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
800b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.append(CGJ);
801b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return result;
803b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
804b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
805b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
806b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
807b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return FALSE;
808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
809b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
810b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
811b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
812b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return FALSE;
813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
814b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
815b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
816b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst RuleBasedCollator &AlphabeticIndex::getCollator() const {
817fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return *collator_;
818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
819b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
821b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getInflowLabel() const {
822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return inflowLabel_;
823b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
824b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
825b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getOverflowLabel() const {
826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return overflowLabel_;
827b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
829b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
830b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return underflowLabel_;
832b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
833b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inflowLabel_ = label;
837f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
839b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
841b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    overflowLabel_ = label;
844f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
846b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
847b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
848b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
849b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    underflowLabel_ = label;
851f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
855b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getMaxLabelCount() const {
857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return maxLabelCount_;
858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (maxLabelCount <= 0) {
866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    maxLabelCount_ = maxLabelCount;
870f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
874b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  init() - Common code for constructors.
877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
879f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
880f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return; }
881f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (locale == NULL && collator_ == NULL) {
882f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_ILLEGAL_ARGUMENT_ERROR;
883b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
884b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
885f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
886b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    initialLabels_         = new UnicodeSet();
887f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (initialLabels_ == NULL) {
888f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
889f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return;
890f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
891b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
892f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
893b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    overflowLabel_ = inflowLabel_;
894b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    underflowLabel_ = inflowLabel_;
895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
896f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collator_ == NULL) {
897fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        Collator *coll = Collator::createInstance(*locale, status);
898fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (U_FAILURE(status)) {
899fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            delete coll;
900fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
901fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
902fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (coll == NULL) {
903f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
904f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return;
905f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
906fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        collator_ = dynamic_cast<RuleBasedCollator *>(coll);
907fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (collator_ == NULL) {
908fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            delete coll;
909fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            status = U_UNSUPPORTED_ERROR;
910fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
911fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
912f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
913f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
914f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collatorPrimaryOnly_ == NULL) {
915f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
918f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
919f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    firstCharsInScripts_ = firstStringsInScript(status);
920f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return; }
921f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
922f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Guard against a degenerate collator where
923f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // some script boundary strings are primary ignorable.
924f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (;;) {
925f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (U_FAILURE(status)) { return; }
926f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (firstCharsInScripts_->isEmpty()) {
927f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // AlphabeticIndex requires some non-ignorable script boundary strings.
928f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_ILLEGAL_ARGUMENT_ERROR;
929f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return;
930b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
931f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(
932f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
933f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                emptyString_, status) == UCOL_EQUAL) {
934f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            firstCharsInScripts_->removeElementAt(0);
935f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
936f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            break;
937b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
938fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
939fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
940fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Chinese index characters, which are specific to each of the several Chinese tailorings,
941fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // take precedence over the single locale data exemplar set per language.
942fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (!addChineseIndexCharacters(status) && locale != NULL) {
943fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addIndexExemplars(*locale, status);
944b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
945b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
947b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
948b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Comparison function for UVector<UnicodeString *> sorting with a collator.
950b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
951b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
952f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right) {
953103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *leftElement = static_cast<const UElement *>(left);
954103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *rightElement = static_cast<const UElement *>(right);
955103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
956103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
957b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
958b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (leftString == rightString) {
959b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Catches case where both are NULL
960b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
961b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
962b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (leftString == NULL) {
963b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 1;
964b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
965b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (rightString == NULL) {
966b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return -1;
967b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
968f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const Collator *col = static_cast<const Collator *>(context);
969f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode errorCode = U_ZERO_ERROR;
970f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return col->compare(*leftString, *rightString, errorCode);
971b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
972b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
973b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
974b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Comparison function for UVector<Record *> sorting with a collator.
975b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
977b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right) {
978103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *leftElement = static_cast<const UElement *>(left);
979103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *rightElement = static_cast<const UElement *>(right);
980103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
981103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
982b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const Collator *col = static_cast<const Collator *>(context);
983f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode errorCode = U_ZERO_ERROR;
984f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return col->compare(leftRec->name_, rightRec->name_, errorCode);
985b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
987b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
988b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
989b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
990b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
9911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    LocalPointer<UVector> dest(new UVector(status), status);
9921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(status)) {
993b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
994b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
995103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    dest->setDeleter(uprv_deleteUObject);
996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Fetch the script-first-primary contractions which are defined in the root collator.
997fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // They all start with U+FDD1.
998fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet set;
999fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1000fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(status)) {
1001fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULL;
1002fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1003fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (set.isEmpty()) {
1004fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        status = U_UNSUPPORTED_ERROR;
1005fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULL;
1006fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1007fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSetIterator iter(set);
1008fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while (iter.next()) {
1009fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UnicodeString &boundary = iter.getString();
1010fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1011fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1012fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Ignore boundaries for the special reordering groups.
1013fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Take only those for "real scripts" (where the sample character is a Letter,
1014fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // and the one for unassigned implicit weights (Cn).
1015fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            continue;
1016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
1017fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UnicodeString *s = new UnicodeString(boundary);
1018fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (s == NULL) {
1019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            status = U_MEMORY_ALLOCATION_ERROR;
1020fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return NULL;
1021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
1022fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dest->addElement(s, status);
1023fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1024fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return dest.orphan();
1025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1027b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1028f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
1029b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1030b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
1031f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Returns true if one index character string is "better" than the other.
1032f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1033f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * better, and otherwise binary-less-than is better.
1034b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
1035f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1036f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                const UnicodeString &one, const UnicodeString &other) {
1037f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This is called with primary-equal strings, but never with one.equals(other).
1038f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode status = U_ZERO_ERROR;
1039f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1040f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1041f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return FALSE; }
1042f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t result = n1.countChar32() - n2.countChar32();
1043b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (result != 0) {
1044f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return result < 0;
1045b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1046b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    result = n1.compareCodePointOrder(n2);
1047b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (result != 0) {
1048f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return result < 0;
1049b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1050f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return one.compareCodePointOrder(other) < 0;
1051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1053f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
1054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1055b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1056b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Constructor & Destructor for AlphabeticIndex::Record
1057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//     Records are internal only, instances are not directly surfaced in the public API.
1059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//     This class is mostly struct-like, with all public fields.
1060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1061f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1062f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : name_(name), data_(data) {}
1063f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
1064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Record::~Record() {
1065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1066b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1067b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1068b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1069b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1070b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
1071b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1072f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (inputList_ == NULL) {
1073f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_ = new UVector(status);
1074f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (inputList_ == NULL) {
1075f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
1076f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return *this;
1077f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
1078f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_->setDeleter(alphaIndex_deleteRecord);
1079f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1080f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Record *r = new Record(name, data);
1081f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (r == NULL) {
1082f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
1083f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return *this;
1084f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1085f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inputList_->addElement(r, status);
1086f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
1087b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::string ss;
1088b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::string ss2;
1089b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1090b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1091b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1092b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1093b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1094b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1096f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1097f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_->removeAllElements();
1098f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        clearBuckets();
1099b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
1105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
1107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex() const {
1113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return labelsIterIndex_;
1114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ == NULL && currentBucket_ != NULL) {
1122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ENUM_OUT_OF_SYNC_ERROR;
1123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
1126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ++labelsIterIndex_;
1130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        labelsIterIndex_ = buckets_->getBucketCount();
1132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1134f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    resetRecordIterator();
1136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return TRUE;
1137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getBucketLabel() const {
1140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ != NULL) {
1141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->label_;
1142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return emptyString_;
1144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ != NULL) {
1150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->labelType_;
1151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return U_ALPHAINDEX_NORMAL;
1153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketRecordCount() const {
1158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->records_->size();
1160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
1162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
1169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    internalResetBucketIterator();
1171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ == NULL) {
1180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // We are trying to iterate over the items in a bucket, but there is no
1181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // current bucket from the enumeration of buckets.
1182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_INVALID_STATE_ERROR;
1183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ == NULL) {
1186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ENUM_OUT_OF_SYNC_ERROR;
1187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_->records_ == NULL) {
1190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return FALSE;
1191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ++itemsIterIndex_;
1193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_  = currentBucket_->records_->size();
1195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return TRUE;
1198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getRecordName() const {
1202f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *retStr = &emptyString_;
1203f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ >= 0 &&
1205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ < currentBucket_->records_->size()) {
1206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            retStr = &item->name_;
1208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *retStr;
1210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst void *AlphabeticIndex::getRecordData() const {
1213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const void *retPtr = NULL;
1214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ >= 0 &&
1216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ < currentBucket_->records_->size()) {
1217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            retPtr = item->data_;
1219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return retPtr;
1221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    itemsIterIndex_ = -1;
1226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                const UnicodeString &lowerBoundary,
1233f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                UAlphabeticIndexLabelType type)
1234f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1235f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          displayBucket_(NULL), displayIndex_(-1),
1236f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          records_(NULL) {
1237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::~Bucket() {
1241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete records_;
1242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
1245103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
1246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
1247