1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2009-2014, International Business Machines Corporation and
4f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* others. All Rights Reserved.
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/alphaindex.h"
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/coll.h"
14f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/localpointer.h"
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/normalizer2.h"
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/tblcoll.h"
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uchar.h"
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ulocdata.h"
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uniset.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h"
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/usetiter.h"
22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
24f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "cmemory.h"
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cstring.h"
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h"
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uvector.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h"
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//#include <string>
31103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//#include <iostream>
32f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
33f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
34f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
37f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
39f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
40f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Prefix string for Chinese index buckets.
41f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
42f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
43f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UChar BASE[1] = { 0xFDD0 };
44f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst int32_t BASE_LENGTH = 1;
45f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
46f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
47f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                const UnicodeString &one, const UnicodeString &other);
48f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
49f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
52f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right);
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right);
56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  UVector<Record *> support function, delete a Record.
58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void U_CALLCONV
59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoalphaIndex_deleteRecord(void *obj) {
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete static_cast<AlphabeticIndex::Record *>(obj);
61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
63f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
65f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
66f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                           UErrorCode &errorCode) {
67f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
68f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (owned.isValid()) {
69f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return owned.orphan();
70f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
71f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString *p = new UnicodeString(s);
72f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (p == NULL) {
73f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
74f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
75f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return p;
76f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
78f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline UnicodeString *getString(const UVector &list, int32_t i) {
79f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<UnicodeString *>(list[i]);
80f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
82f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
83f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<AlphabeticIndex::Bucket *>(list[i]);
84f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
86f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
87f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return static_cast<AlphabeticIndex::Record *>(list[i]);
88f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
89f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
90f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
91f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Like Java Collections.binarySearch(List, String, Comparator).
92f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *
93f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * @return the index>=0 where the item was found,
94f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *         or the index<0 for inserting the string at ~index in sorted order
95f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
96f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
97f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (list.size() == 0) { return ~0; }
98f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t start = 0;
99f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t limit = list.size();
100f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (;;) {
101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t i = (start + limit) / 2;
102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UErrorCode errorCode = U_ZERO_ERROR;
104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UCollationResult cmp = coll.compare(s, *si, errorCode);
105f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (cmp == UCOL_EQUAL) {
106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return i;
107f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (cmp < 0) {
108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (i == start) {
109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return ~start;  // insert s before *si
110f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            limit = i;
112f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (i == start) {
114f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return ~(start + 1);  // insert s after *si
115f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
116f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            start = i;
117f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
123f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// The BucketList is not in the anonymous namespace because only Clang
124f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// seems to support its use in other classes from there.
125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// However, we also don't need U_I18N_API because it is not used from outside the i18n library.
126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusclass BucketList : public UObject {
127f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliuspublic:
128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    BucketList(UVector *bucketList, UVector *publicBucketList)
129f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t displayIndex = 0;
131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < publicBucketList->size(); ++i) {
132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
133f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
134f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // The virtual destructor must not be inline.
1378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // See ticket #8454 for details.
1388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    virtual ~BucketList();
139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
140f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t getBucketCount() const {
141f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return immutableVisibleList_->size();
142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
144f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
145f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                           UErrorCode &errorCode) {
146f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // binary search
147f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t start = 0;
148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t limit = bucketList_->size();
149f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while ((start + 1) < limit) {
150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t i = (start + limit) / 2;
151f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
152f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UCollationResult nameVsBucket =
153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
154f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (nameVsBucket < 0) {
155f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                limit = i;
156f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            } else {
157f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                start = i;
158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
159f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
160f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket = bucket->displayBucket_;
163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
164f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bucket->displayIndex_;
165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
166f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
167f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    /** All of the buckets, visible and invisible. */
168f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector *bucketList_;
169f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    /** Just the visible buckets. */
170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector *immutableVisibleList_;
171f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius};
172f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusBucketList::~BucketList() {
1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    delete bucketList_;
1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if (immutableVisibleList_ != bucketList_) {
1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        delete immutableVisibleList_;
1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
1788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
1798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete buckets_;
182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete collatorPrimaryOnly_;
183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t
186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketCount() const {
187f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketCount();
188f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t
191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketIndex(
192f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString &name, UErrorCode &errorCode) const {
193f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst AlphabeticIndex::Bucket *
197f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
198f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (0 <= index && index < buckets_->getBucketCount()) {
199f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return icu::getBucket(*buckets_->immutableVisibleList_, index);
200f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
201f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
205f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
206f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : inputList_(NULL),
207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          maxLabelCount_(99),
209f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          initialLabels_(NULL), firstCharsInScripts_(NULL),
210f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          collator_(NULL), collatorPrimaryOnly_(NULL),
211f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          buckets_(NULL) {
212f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    init(&locale, status);
213f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
215f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
216f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
217f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : inputList_(NULL),
218f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          maxLabelCount_(99),
220f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          initialLabels_(NULL), firstCharsInScripts_(NULL),
221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          collator_(collator), collatorPrimaryOnly_(NULL),
222f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          buckets_(NULL) {
223f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    init(NULL, status);
224f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
225f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
226f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::~AlphabeticIndex() {
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete collator_;
230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete collatorPrimaryOnly_;
231f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete firstCharsInScripts_;
232f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete buckets_;
233f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    delete inputList_;
234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete initialLabels_;
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    initialLabels_->addAll(additions);
243f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    addIndexExemplars(locale, status);
250f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
255f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
256f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
257f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // In C++, the ImmutableIndex must own its copy of the BucketList,
258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // even if it contains no records, for proper memory management.
259f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We could clone the buckets_ if they are not NULL,
260f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // but that would be worth it only if this method is called multiple times,
261f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // or called after using the old-style bucket iterator API.
262f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
263f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<RuleBasedCollator> coll(
264f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
265f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (immutableBucketList.isNull() || coll.isNull()) {
266f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
267f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
268f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
269f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (immIndex == NULL) {
271f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
272f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
273f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
274f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // The ImmutableIndex adopted its parameter objects.
275f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    immutableBucketList.orphan();
276f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    coll.orphan();
277f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return immIndex;
278f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
279f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
281f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
285f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketCount();
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
290f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status) || inputList_ == NULL) {
291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
293f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return inputList_->size();
294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
296f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
297f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
298f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return; }
299f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
300f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
301f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString &overflowBoundary =
302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
304f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We make a sorted array of elements.
305f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Some of the input may be redundant.
306f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
307f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // We filter out those cases.
308f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSetIterator iter(*initialLabels_);
309f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (iter.next()) {
310f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString *item = &iter.getString();
311f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        LocalPointer<UnicodeString> ownedItem;
312f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UBool checkDistinct;
313f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t itemLength = item->length();
314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (!item->hasMoreChar32Than(0, itemLength, 1)) {
315f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = FALSE;
316f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
317f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                item->charAt(itemLength - 2) != 0x2a) {
318f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Use a label if it is marked with one trailing star,
319f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // even if the label string sorts the same when all contractions are suppressed.
320f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
321f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            item = ownedItem.getAlias();
322f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (item == NULL) {
323f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
324f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return;
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
326f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = FALSE;
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
328f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            checkDistinct = TRUE;
329f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
330f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
331f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Ignore a primary-ignorable or non-alphabetic index character.
332f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Ignore an index character that will land in the overflow bucket.
334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (checkDistinct &&
335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // Ignore a multi-code point index character that does not sort distinctly
337f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // from the sequence of its separate characters.
338f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
339f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
340f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (insertionPoint < 0) {
341f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                indexCharacters.insertElementAt(
342f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
343f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            } else {
344f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
345f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
346f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    indexCharacters.setElementAt(
347f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        ownedString(*item, ownedItem, errorCode), insertionPoint);
348f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
349f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
352f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return; }
353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
354f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // if the result is still too large, cut down to maxCount elements, by removing every nth element
355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
356f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t size = indexCharacters.size() - 1;
357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (size > maxLabelCount_) {
358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t count = 0;
359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t old = -1;
360f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < indexCharacters.size();) {
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            ++count;
362f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            int32_t bump = count * maxLabelCount_ / size;
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if (bump == old) {
364f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                indexCharacters.removeElementAt(i);
365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                old = bump;
367f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                ++i;
368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
371f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
373f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
374f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
375f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
376f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (!current.startsWith(BASE, BASE_LENGTH)) {
377f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return current;
378f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
379f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UChar rest = current.charAt(BASE_LENGTH);
380f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (0x2800 < rest && rest <= 0x28FF) { // stroke count
381f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        int32_t count = rest-0x2800;
382f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        temp.setTo((UChar)(0x30 + count % 10));
383f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (count >= 10) {
384f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            count /= 10;
385f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            temp.insert(0, (UChar)(0x30 + count % 10));
386f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (count >= 10) {
387f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                count /= 10;
388f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                temp.insert(0, (UChar)(0x30 + count));
389f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
390f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
391f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return temp.append((UChar)0x5283);
392f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
393f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return temp.setTo(current, BASE_LENGTH);
394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
396f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool hasMultiplePrimaryWeights(
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const RuleBasedCollator &coll, uint32_t variableTop,
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ces.removeAllElements();
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll.internalGetCEs(s, ces, errorCode);
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(errorCode)) { return FALSE; }
402f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool seenPrimary = FALSE;
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for (int32_t i = 0; i < ces.size(); ++i) {
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int64_t ce = ces.elementAti(i);
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t p = (uint32_t)(ce >> 32);
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (p > variableTop) {
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // not primary ignorable
408f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (seenPrimary) {
409f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return TRUE;
410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
411f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            seenPrimary = TRUE;
412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
413f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
414f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return FALSE;
415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
417f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
419f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusBucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
420f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Initialize indexCharacters.
421f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UVector indexCharacters(errorCode);
422f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    indexCharacters.setDeleter(uprv_deleteUObject);
423f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initLabels(indexCharacters, errorCode);
424f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
425f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
426f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Variables for hasMultiplePrimaryWeights().
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector64 ces(errorCode);
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t variableTop;
429f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
431f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
432f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        variableTop = 0;
433f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
434f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool hasInvisibleBuckets = FALSE;
435f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
436f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Helper arrays for Chinese Pinyin collation.
437f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *asciiBuckets[26] = {
438f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
439f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
440f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    };
441f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *pinyinBuckets[26] = {
442f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
443f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
444f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    };
445f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UBool hasPinyin = FALSE;
446f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
447f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<UVector> bucketList(new UVector(errorCode));
448f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketList.isNull()) {
449f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
450f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
451f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
452f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->setDeleter(uprv_deleteUObject);
453f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
454f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // underflow bucket
455f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
456f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucket == NULL) {
457f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
458f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
459f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
460f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->addElement(bucket, errorCode);
461f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
462f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
463f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString temp;
464f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
465f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // fix up the list, adding underflow, additions, overflow
466f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Insert inflow labels as needed.
467f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t scriptIndex = -1;
468f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *scriptUpperBoundary = &emptyString_;
469f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < indexCharacters.size(); ++i) {
470f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeString &current = *getString(indexCharacters, i);
471f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
472f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // We crossed the script boundary into a new script.
473f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            const UnicodeString &inflowBoundary = *scriptUpperBoundary;
474f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UBool skippedScript = FALSE;
475f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            for (;;) {
476f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
477f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
478f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
479f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
480f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                skippedScript = TRUE;
481f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
482f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (skippedScript && bucketList->size() > 1) {
483f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                // We are skipping one or more scripts,
484f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                // and we are not just getting out of the underflow label.
485f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
486f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (bucket == NULL) {
487f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    errorCode = U_MEMORY_ALLOCATION_ERROR;
488f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    return NULL;
489f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
490f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucketList->addElement(bucket, errorCode);
491f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
492f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
493f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Add a bucket with the current label.
494f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
495f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket == NULL) {
496f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
497f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
498f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
499f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList->addElement(bucket, errorCode);
500f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Remember ASCII and Pinyin buckets for Pinyin redirects.
501f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UChar c;
502f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
503f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            asciiBuckets[c - 0x41] = bucket;
504f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
505f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
506f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            pinyinBuckets[c - 0x41] = bucket;
507f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            hasPinyin = TRUE;
508f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
509f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Check for multiple primary weights.
510f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (!current.startsWith(BASE, BASE_LENGTH) &&
511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          ces, errorCode) &&
513f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
514f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // "AE-ligature" or "Sch" etc.
515f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            for (int32_t i = bucketList->size() - 2;; --i) {
516f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                Bucket *singleBucket = getBucket(*bucketList, i);
517f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
518f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // There is no single-character bucket since the last
519f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // underflow or inflow label.
520f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
521f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
522f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                if (singleBucket->displayBucket_ == NULL &&
523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                                   singleBucket->lowerBoundary_,
525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                                   ces, errorCode)) {
526f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // Add an invisible bucket that redirects strings greater than the expansion
527f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // to the previous single-character bucket.
528f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // For example, after ... Q R S Sch we add Sch\uFFFF->S
529f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
530f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucket = new Bucket(emptyString_,
531f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        UnicodeString(current).append((UChar)0xFFFF),
532f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        U_ALPHAINDEX_NORMAL);
533f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    if (bucket == NULL) {
534f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        errorCode = U_MEMORY_ALLOCATION_ERROR;
535f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                        return NULL;
536f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    }
537f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucket->displayBucket_ = singleBucket;
538f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    bucketList->addElement(bucket, errorCode);
539f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    hasInvisibleBuckets = TRUE;
540f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                    break;
541f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                }
542f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
543f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
544f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
545f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
546f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketList->size() == 1) {
547f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // No real labels, show only the underflow label.
548f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
549f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bl == NULL) {
550f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
551f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
552f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
553f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList.orphan();
554f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bl;
555f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
556f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // overflow bucket
557f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
558f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucket == NULL) {
559f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
560f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
561f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
562f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList->addElement(bucket, errorCode); // final
563f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
564f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (hasPinyin) {
565f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // Redirect Pinyin buckets.
566f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Bucket *asciiBucket = NULL;
567f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        for (int32_t i = 0; i < 26; ++i) {
568f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (asciiBuckets[i] != NULL) {
569f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                asciiBucket = asciiBuckets[i];
570f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
571f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
572f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                pinyinBuckets[i]->displayBucket_ = asciiBucket;
573f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                hasInvisibleBuckets = TRUE;
574f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
575f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
576f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
577f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
578f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
579f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (!hasInvisibleBuckets) {
580f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
581f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bl == NULL) {
582f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
583f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return NULL;
584f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
585f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucketList.orphan();
586f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return bl;
587f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
588f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Merge inflow buckets that are visually adjacent.
589f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Iterate backwards: Merge inflow into overflow rather than the other way around.
590f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t i = bucketList->size() - 1;
591f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *nextBucket = getBucket(*bucketList, i);
592f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (--i > 0) {
593f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = getBucket(*bucketList, i);
594f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
595f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            continue;  // skip invisible buckets
596f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
597f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
598f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
599f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                bucket->displayBucket_ = nextBucket;
600f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                continue;
601f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
602f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
603f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = bucket;
604f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
605f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
606f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    LocalPointer<UVector> publicBucketList(new UVector(errorCode));
607f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketList.isNull()) {
608f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
609f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
610f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
611f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Do not call publicBucketList->setDeleter():
612f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This vector shares its objects with the bucketList.
613f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < bucketList->size(); ++i) {
614f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket = getBucket(*bucketList, i);
615f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ == NULL) {
616f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            publicBucketList->addElement(bucket, errorCode);
617f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
618f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
619f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode)) { return NULL; }
620f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
621f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bl == NULL) {
622f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
623f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return NULL;
624f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
625f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    bucketList.orphan();
626f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    publicBucketList.orphan();
627f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return bl;
628f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
629f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
630f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/**
631f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Creates an index, and buckets and sorts the list of records into the index.
632f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */
633f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
634f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode) || buckets_ != NULL) {
635f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return;
636f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
637f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    buckets_ = createBucketList(errorCode);
638f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
639b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
641b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
642f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Sort the records by name.
643f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Stable sort preserves input order of collation duplicates.
644f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
645f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
646f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Now, we traverse all of the input, which is now sorted.
647f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // If the item doesn't go in the current bucket, we find the next bucket that contains it.
648f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This makes the process order n*log(n), since we just sort the list and then do a linear process.
649f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
650f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // we need to improve it for that case.
651f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
652f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
653f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t bucketIndex = 1;
654f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Bucket *nextBucket;
655f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *upperBoundary;
656f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (bucketIndex < buckets_->bucketList_->size()) {
657f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
658f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        upperBoundary = &nextBucket->lowerBoundary_;
659f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    } else {
660f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        nextBucket = NULL;
661f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        upperBoundary = NULL;
662f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
663f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (int32_t i = 0; i < inputList_->size(); ++i) {
664f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Record *r = getRecord(*inputList_, i);
665f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // if the current bucket isn't the right one, find the one that is
666f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // We have a special flag for the last bucket so that we don't look any further
667f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while (upperBoundary != NULL &&
668f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
669f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            currentBucket = nextBucket;
670f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // now reset the boundary that we compare against
671f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (bucketIndex < buckets_->bucketList_->size()) {
672f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
673f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                upperBoundary = &nextBucket->lowerBoundary_;
674b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
675f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                upperBoundary = NULL;
676f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
677f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
678f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // now put the record into the bucket.
679f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        Bucket *bucket = currentBucket;
680f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->displayBucket_ != NULL) {
681f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket = bucket->displayBucket_;
682f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
683f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (bucket->records_ == NULL) {
684f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            bucket->records_ = new UVector(errorCode);
685f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if (bucket->records_ == NULL) {
686f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
687f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                return;
688b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
690f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        bucket->records_->addElement(r, errorCode);
691b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
692f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
693b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
694f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::clearBuckets() {
695f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ != NULL) {
696f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        delete buckets_;
697f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        buckets_ = NULL;
698f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        internalResetBucketIterator();
699f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
700f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
701f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
702f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::internalResetBucketIterator() {
703f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    labelsIterIndex_ = -1;
704f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    currentBucket_ = NULL;
705b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
707b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
709fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
710b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
712b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
713b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
714b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeSet exemplars;
715b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
716b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_SUCCESS(status)) {
717f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        initialLabels_->addAll(exemplars);
718b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
720b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
721b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
722f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // The locale data did not include explicit Index characters.
723b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Synthesize a set of them from the locale's standard exemplar characters.
724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
726b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
727b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
728b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
729f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // question: should we add auxiliary exemplars?
730f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
731f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        exemplars.add(0x61, 0x7A);
732f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
733f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
734f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // cut down to small list
735f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        exemplars.remove(0xAC00, 0xD7A3).
736f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
737f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
738f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
739f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            add(0xD30C).add(0xD558);
740f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
741f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
742f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // cut down to small list
743f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // make use of the fact that Ethiopic is allocated in 8's, where
744f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        // the base is 0 mod 8.
745f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeSet ethiopic(
746f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
747f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        UnicodeSetIterator it(ethiopic);
748f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        while (it.next() && !it.isString()) {
749f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            if ((it.getCodepoint() & 0x7) != 0) {
750f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                exemplars.remove(it.getCodepoint());
751f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            }
752f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
753f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
754f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Upper-case any that aren't already so.
756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //   (We only do this for synthesized index characters.)
757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeSetIterator it(exemplars);
758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString upperC;
759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while (it.next()) {
760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UnicodeString &exemplarC = it.getString();
761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        upperC = exemplarC;
762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        upperC.toUpper(locale);
763f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        initialLabels_->add(upperC);
764b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
765f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}
766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
767f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
768f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSet contractions;
769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
770fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    initialLabels_->addAll(contractions);
772f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeSetIterator iter(contractions);
773f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    while (iter.next()) {
774f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        const UnicodeString &s = iter.getString();
775fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
776fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar c = s.charAt(s.length() - 1);
777fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (0x41 <= c && c <= 0x5A) {  // A-Z
778fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // There are Pinyin labels, add ASCII A-Z labels as well.
779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            initialLabels_->add(0x41, 0x5A);  // A-Z
780fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
782b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
783fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
785b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
786b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
787b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
788b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
789b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
790f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar CGJ = 0x034F;
791b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
792b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UnicodeString result;
793b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (item.length() == 0) {
794b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return result;
795b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
796b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t i = 0;
797b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for (;;) {
798b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UChar32  cp = item.char32At(i);
799b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.append(cp);
800b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        i = item.moveIndex32(i, 1);
801b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (i >= item.length()) {
802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
803b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
804b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.append(CGJ);
805b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
806b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return result;
807b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
809b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
810b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
811b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return FALSE;
812b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
814b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
815b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
816b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return FALSE;
817b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
819b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst RuleBasedCollator &AlphabeticIndex::getCollator() const {
821fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return *collator_;
822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
823b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
824b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
825b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getInflowLabel() const {
826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return inflowLabel_;
827b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
829b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getOverflowLabel() const {
830b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return overflowLabel_;
831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
832b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
833b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return underflowLabel_;
836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
837b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
839b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inflowLabel_ = label;
841f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
844b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
846b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
847b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    overflowLabel_ = label;
848f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
849b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
851b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    underflowLabel_ = label;
855f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getMaxLabelCount() const {
861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return maxLabelCount_;
862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (maxLabelCount <= 0) {
870b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    maxLabelCount_ = maxLabelCount;
874f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
879b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
880b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  init() - Common code for constructors.
881b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
882b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
883f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
884f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return; }
885f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (locale == NULL && collator_ == NULL) {
886f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_ILLEGAL_ARGUMENT_ERROR;
887b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
889f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
890b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    initialLabels_         = new UnicodeSet();
891f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (initialLabels_ == NULL) {
892f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
893f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return;
894f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
896f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
897b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    overflowLabel_ = inflowLabel_;
898b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    underflowLabel_ = inflowLabel_;
899b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
900f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collator_ == NULL) {
901fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        Collator *coll = Collator::createInstance(*locale, status);
902fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (U_FAILURE(status)) {
903fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            delete coll;
904fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
905fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
906fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (coll == NULL) {
907f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
908f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return;
909f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
910fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        collator_ = dynamic_cast<RuleBasedCollator *>(coll);
911fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (collator_ == NULL) {
912fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            delete coll;
913fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            status = U_UNSUPPORTED_ERROR;
914fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
915fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
916f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
917f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
918f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (collatorPrimaryOnly_ == NULL) {
919f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
922f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
923f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    firstCharsInScripts_ = firstStringsInScript(status);
924f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return; }
925f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
926f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // Guard against a degenerate collator where
927f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // some script boundary strings are primary ignorable.
928f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    for (;;) {
929f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (U_FAILURE(status)) { return; }
930f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (firstCharsInScripts_->isEmpty()) {
931f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            // AlphabeticIndex requires some non-ignorable script boundary strings.
932f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_ILLEGAL_ARGUMENT_ERROR;
933f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return;
934b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
935f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (collatorPrimaryOnly_->compare(
936f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
937f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                emptyString_, status) == UCOL_EQUAL) {
938f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            firstCharsInScripts_->removeElementAt(0);
939f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        } else {
940f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            break;
941b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
942fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
943fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
944fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Chinese index characters, which are specific to each of the several Chinese tailorings,
945fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // take precedence over the single locale data exemplar set per language.
946fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (!addChineseIndexCharacters(status) && locale != NULL) {
947fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addIndexExemplars(*locale, status);
948b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
950b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
951b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
952b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
953b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Comparison function for UVector<UnicodeString *> sorting with a collator.
954b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
955b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
956f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right) {
957103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *leftElement = static_cast<const UElement *>(left);
958103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *rightElement = static_cast<const UElement *>(right);
959103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
960103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
961b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
962b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (leftString == rightString) {
963b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Catches case where both are NULL
964b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
965b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
966b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (leftString == NULL) {
967b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 1;
968b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
969b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (rightString == NULL) {
970b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return -1;
971b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
972f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const Collator *col = static_cast<const Collator *>(context);
973f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode errorCode = U_ZERO_ERROR;
974f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return col->compare(*leftString, *rightString, errorCode);
975b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
977b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
978b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Comparison function for UVector<Record *> sorting with a collator.
979b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
980b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
981b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right) {
982103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *leftElement = static_cast<const UElement *>(left);
983103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const UElement *rightElement = static_cast<const UElement *>(right);
984103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
985103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const Collator *col = static_cast<const Collator *>(context);
987f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode errorCode = U_ZERO_ERROR;
988f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return col->compare(leftRec->name_, rightRec->name_, errorCode);
989b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
990b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
991b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
992b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
993b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
994b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
995fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LocalPointer<UVector> dest(new UVector(status));
996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (dest.isNull()) {
997f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
998b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return NULL;
999b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1000103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    dest->setDeleter(uprv_deleteUObject);
1001fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Fetch the script-first-primary contractions which are defined in the root collator.
1002fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // They all start with U+FDD1.
1003fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet set;
1004fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1005fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(status)) {
1006fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULL;
1007fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1008fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (set.isEmpty()) {
1009fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        status = U_UNSUPPORTED_ERROR;
1010fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULL;
1011fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1012fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSetIterator iter(set);
1013fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while (iter.next()) {
1014fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UnicodeString &boundary = iter.getString();
1015fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1016fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1017fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Ignore boundaries for the special reordering groups.
1018fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Take only those for "real scripts" (where the sample character is a Letter,
1019fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // and the one for unassigned implicit weights (Cn).
1020fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            continue;
1021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
1022fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UnicodeString *s = new UnicodeString(boundary);
1023fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (s == NULL) {
1024b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            status = U_MEMORY_ALLOCATION_ERROR;
1025fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return NULL;
1026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
1027fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dest->addElement(s, status);
1028fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1029fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return dest.orphan();
1030b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1032b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1033f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace {
1034b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1035b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
1036f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Returns true if one index character string is "better" than the other.
1037f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1038f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * better, and otherwise binary-less-than is better.
1039b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
1040f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1041f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                const UnicodeString &one, const UnicodeString &other) {
1042f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    // This is called with primary-equal strings, but never with one.equals(other).
1043f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UErrorCode status = U_ZERO_ERROR;
1044f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1045f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1046f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_FAILURE(status)) { return FALSE; }
1047f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    int32_t result = n1.countChar32() - n2.countChar32();
1048b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (result != 0) {
1049f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return result < 0;
1050b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    result = n1.compareCodePointOrder(n2);
1052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (result != 0) {
1053f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return result < 0;
1054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1055f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return one.compareCodePointOrder(other) < 0;
1056b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1058f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}  // namespace
1059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1061b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//  Constructor & Destructor for AlphabeticIndex::Record
1062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//
1063b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//     Records are internal only, instances are not directly surfaced in the public API.
1064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//     This class is mostly struct-like, with all public fields.
1065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1066f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1067f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : name_(name), data_(data) {}
1068f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius
1069b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Record::~Record() {
1070b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1071b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1072b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1073b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1074b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1075b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
1076b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1077f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (inputList_ == NULL) {
1078f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_ = new UVector(status);
1079f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        if (inputList_ == NULL) {
1080f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            status = U_MEMORY_ALLOCATION_ERROR;
1081f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius            return *this;
1082f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        }
1083f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_->setDeleter(alphaIndex_deleteRecord);
1084f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1085f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    Record *r = new Record(name, data);
1086f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (r == NULL) {
1087f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        status = U_MEMORY_ALLOCATION_ERROR;
1088f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return *this;
1089f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1090f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    inputList_->addElement(r, status);
1091f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    clearBuckets();
1092b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::string ss;
1093b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::string ss2;
1094b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1096b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1097b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1098b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1099b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        inputList_->removeAllElements();
1103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        clearBuckets();
1104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
1110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
1112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex() const {
1118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return labelsIterIndex_;
1119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ == NULL && currentBucket_ != NULL) {
1127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ENUM_OUT_OF_SYNC_ERROR;
1128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    initBuckets(status);
1131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ++labelsIterIndex_;
1135f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1136f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        labelsIterIndex_ = buckets_->getBucketCount();
1137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    resetRecordIterator();
1141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return TRUE;
1142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getBucketLabel() const {
1145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ != NULL) {
1146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->label_;
1147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return emptyString_;
1149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ != NULL) {
1155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->labelType_;
1156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return U_ALPHAINDEX_NORMAL;
1158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketRecordCount() const {
1163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return currentBucket_->records_->size();
1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
1166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
1167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
1174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1175f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    internalResetBucketIterator();
1176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (U_FAILURE(status)) {
1182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (currentBucket_ == NULL) {
1185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // We are trying to iterate over the items in a bucket, but there is no
1186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // current bucket from the enumeration of buckets.
1187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_INVALID_STATE_ERROR;
1188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (buckets_ == NULL) {
1191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        status = U_ENUM_OUT_OF_SYNC_ERROR;
1192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_->records_ == NULL) {
1195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        return FALSE;
1196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    }
1197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ++itemsIterIndex_;
1198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_  = currentBucket_->records_->size();
1200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
1201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return TRUE;
1203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getRecordName() const {
1207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    const UnicodeString *retStr = &emptyString_;
1208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ >= 0 &&
1210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ < currentBucket_->records_->size()) {
1211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            retStr = &item->name_;
1213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *retStr;
1215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst void *AlphabeticIndex::getRecordData() const {
1218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const void *retPtr = NULL;
1219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ >= 0 &&
1221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        itemsIterIndex_ < currentBucket_->records_->size()) {
1222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            retPtr = item->data_;
1224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
1225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return retPtr;
1226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    itemsIterIndex_ = -1;
1231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
1232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                const UnicodeString &lowerBoundary,
1238f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius                                UAlphabeticIndexLabelType type)
1239f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius        : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1240f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          displayBucket_(NULL), displayIndex_(-1),
1241f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius          records_(NULL) {
1242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::~Bucket() {
1246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete records_;
1247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
1248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
1249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
1250103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
1251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
1252