1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2009-2014, International Business Machines Corporation and 4f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/alphaindex.h" 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/coll.h" 14f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/localpointer.h" 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/normalizer2.h" 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/tblcoll.h" 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uchar.h" 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ulocdata.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uniset.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h" 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/usetiter.h" 22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 24f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "cmemory.h" 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cstring.h" 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h" 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uvector.h" 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h" 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//#include <string> 31103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//#include <iostream> 32f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 35f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 37f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 38f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Prefix string for Chinese index buckets. 39f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes 40f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 41f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UChar BASE[1] = { 0xFDD0 }; 42f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst int32_t BASE_LENGTH = 1; 43f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 44f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, 45f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &one, const UnicodeString &other); 46f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 47f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 50f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right); 51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right); 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// UVector<Record *> support function, delete a Record. 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void U_CALLCONV 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoalphaIndex_deleteRecord(void *obj) { 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete static_cast<AlphabeticIndex::Record *>(obj); 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 61f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 63f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned, 64f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode &errorCode) { 65f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 66f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (owned.isValid()) { 67f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return owned.orphan(); 68f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 69f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString *p = new UnicodeString(s); 70f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (p == NULL) { 71f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 72f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 73f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return p; 74f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 76f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline UnicodeString *getString(const UVector &list, int32_t i) { 77f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<UnicodeString *>(list[i]); 78f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 80f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) { 81f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<AlphabeticIndex::Bucket *>(list[i]); 82f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 84f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) { 85f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<AlphabeticIndex::Record *>(list[i]); 86f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 87f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 88f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 89f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Like Java Collections.binarySearch(List, String, Comparator). 90f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 91f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * @return the index>=0 where the item was found, 92f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * or the index<0 for inserting the string at ~index in sorted order 93f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 94f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) { 95f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (list.size() == 0) { return ~0; } 96f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t start = 0; 97f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t limit = list.size(); 98f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 99f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = (start + limit) / 2; 100f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i)); 101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UCollationResult cmp = coll.compare(s, *si, errorCode); 103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (cmp == UCOL_EQUAL) { 104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return i; 105f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (cmp < 0) { 106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (i == start) { 107f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return ~start; // insert s before *si 108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius limit = i; 110f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (i == start) { 112f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return ~(start + 1); // insert s after *si 113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 114f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius start = i; 115f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 119f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 120f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// The BucketList is not in the anonymous namespace because only Clang 122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// seems to support its use in other classes from there. 123f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// However, we also don't need U_I18N_API because it is not used from outside the i18n library. 124f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusclass BucketList : public UObject { 125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliuspublic: 126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList(UVector *bucketList, UVector *publicBucketList) 127f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : bucketList_(bucketList), immutableVisibleList_(publicBucketList) { 128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t displayIndex = 0; 129f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < publicBucketList->size(); ++i) { 130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++; 131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // The virtual destructor must not be inline. 1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // See ticket #8454 for details. 1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius virtual ~BucketList(); 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 138f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t getBucketCount() const { 139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return immutableVisibleList_->size(); 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 141f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 142f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly, 143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode &errorCode) { 144f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // binary search 145f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t start = 0; 146f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t limit = bucketList_->size(); 147f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while ((start + 1) < limit) { 148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = (start + limit) / 2; 149f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i); 150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UCollationResult nameVsBucket = 151f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode); 152f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (nameVsBucket < 0) { 153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius limit = i; 154f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 155f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius start = i; 156f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 157f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start); 159f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 160f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = bucket->displayBucket_; 161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bucket->displayIndex_; 163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 164f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 165f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius /** All of the buckets, visible and invisible. */ 166f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector *bucketList_; 167f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius /** Just the visible buckets. */ 168f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector *immutableVisibleList_; 169f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}; 170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 1718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusBucketList::~BucketList() { 1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete bucketList_; 1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (immutableVisibleList_ != bucketList_) { 1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete immutableVisibleList_; 1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 178f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::~ImmutableIndex() { 179f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete collatorPrimaryOnly_; 181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t 184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketCount() const { 185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketCount(); 186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 187f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 188f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t 189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketIndex( 190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &name, UErrorCode &errorCode) const { 191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode); 192f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 193f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst AlphabeticIndex::Bucket * 195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const { 196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0 <= index && index < buckets_->getBucketCount()) { 197f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return icu::getBucket(*buckets_->immutableVisibleList_, index); 198f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 199f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 203f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status) 204f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : inputList_(NULL), 205f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL), 206f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius maxLabelCount_(99), 207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_(NULL), firstCharsInScripts_(NULL), 208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_(NULL), collatorPrimaryOnly_(NULL), 209f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_(NULL) { 210f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius init(&locale, status); 211f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 212f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 213f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status) 215f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : inputList_(NULL), 216f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL), 217f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius maxLabelCount_(99), 218f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_(NULL), firstCharsInScripts_(NULL), 219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_(collator), collatorPrimaryOnly_(NULL), 220f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_(NULL) { 221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius init(NULL, status); 222f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 223f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 224f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::~AlphabeticIndex() { 227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete collator_; 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete collatorPrimaryOnly_; 229f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete firstCharsInScripts_; 230f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 231f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete inputList_; 232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete initialLabels_; 233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) { 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho initialLabels_->addAll(additions); 241f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) { 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addIndexExemplars(locale, status); 248f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 253f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) { 254f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 255f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // In C++, the ImmutableIndex must own its copy of the BucketList, 256f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // even if it contains no records, for proper memory management. 257f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We could clone the buckets_ if they are not NULL, 258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // but that would be worth it only if this method is called multiple times, 259f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // or called after using the old-style bucket iterator API. 260f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode)); 261f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<RuleBasedCollator> coll( 262f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone())); 263f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immutableBucketList.isNull() || coll.isNull()) { 264f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 265f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 266f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 267f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias()); 268f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immIndex == NULL) { 269f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 271f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 272f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The ImmutableIndex adopted its parameter objects. 273f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius immutableBucketList.orphan(); 274f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius coll.orphan(); 275f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return immIndex; 276f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 277f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketCount(UErrorCode &status) { 279f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 283f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketCount(); 284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getRecordCount(UErrorCode &status) { 288f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status) || inputList_ == NULL) { 289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 291f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return inputList_->size(); 292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 294f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { 295f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); 296f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return; } 297f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 298f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0); 299f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &overflowBoundary = 300f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1); 301f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We make a sorted array of elements. 303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Some of the input may be redundant. 304f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". 305f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We filter out those cases. 306f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator iter(*initialLabels_); 307f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (iter.next()) { 308f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *item = &iter.getString(); 309f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UnicodeString> ownedItem; 310f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool checkDistinct; 311f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t itemLength = item->length(); 312f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!item->hasMoreChar32Than(0, itemLength, 1)) { 313f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = FALSE; 314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if(item->charAt(itemLength - 1) == 0x2a && // '*' 315f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius item->charAt(itemLength - 2) != 0x2a) { 316f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Use a label if it is marked with one trailing star, 317f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // even if the label string sorts the same when all contractions are suppressed. 318f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1)); 319f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius item = ownedItem.getAlias(); 320f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (item == NULL) { 321f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 322f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 324f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = FALSE; 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 326f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = TRUE; 327f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 328f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) { 329f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore a primary-ignorable or non-alphabetic index character. 330f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) { 331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Ignore an index character that will land in the overflow bucket. 332f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (checkDistinct && 333f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) { 334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore a multi-code point index character that does not sort distinctly 335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // from the sequence of its separate characters. 336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 337f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_); 338f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (insertionPoint < 0) { 339f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.insertElementAt( 340f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode); 341f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 342f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint); 343f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) { 344f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.setElementAt( 345f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedString(*item, ownedItem, errorCode), insertionPoint); 346f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 347f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 350f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return; } 351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 352f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element 353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 354f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t size = indexCharacters.size() - 1; 355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (size > maxLabelCount_) { 356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t count = 0; 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t old = -1; 358f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < indexCharacters.size();) { 359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++count; 360f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t bump = count * maxLabelCount_ / size; 361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (bump == old) { 362f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.removeElementAt(i); 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho old = bump; 365f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ++i; 366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 369f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 371f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 372f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 373f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UnicodeString &fixLabel(const UnicodeString ¤t, UnicodeString &temp) { 374f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!current.startsWith(BASE, BASE_LENGTH)) { 375f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return current; 376f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 377f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar rest = current.charAt(BASE_LENGTH); 378f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0x2800 < rest && rest <= 0x28FF) { // stroke count 379f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t count = rest-0x2800; 380f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.setTo((UChar)(0x30 + count % 10)); 381f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (count >= 10) { 382f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius count /= 10; 383f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.insert(0, (UChar)(0x30 + count % 10)); 384f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (count >= 10) { 385f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius count /= 10; 386f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.insert(0, (UChar)(0x30 + count)); 387f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 388f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 389f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return temp.append((UChar)0x5283); 390f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 391f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return temp.setTo(current, BASE_LENGTH); 392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 394f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool hasMultiplePrimaryWeights( 395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const RuleBasedCollator &coll, uint32_t variableTop, 396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) { 397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces.removeAllElements(); 398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll.internalGetCEs(s, ces, errorCode); 399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(errorCode)) { return FALSE; } 400f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool seenPrimary = FALSE; 401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for (int32_t i = 0; i < ces.size(); ++i) { 402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = ces.elementAti(i); 403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p = (uint32_t)(ce >> 32); 404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (p > variableTop) { 405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // not primary ignorable 406f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (seenPrimary) { 407f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return TRUE; 408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 409f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius seenPrimary = TRUE; 410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 411f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 412f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 415f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 417f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusBucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { 418f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Initialize indexCharacters. 419f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector indexCharacters(errorCode); 420f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.setDeleter(uprv_deleteUObject); 421f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initLabels(indexCharacters, errorCode); 422f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 423f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 424f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Variables for hasMultiplePrimaryWeights(). 425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector64 ces(errorCode); 426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t variableTop; 427f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) { 428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius variableTop = collatorPrimaryOnly_->getVariableTop(errorCode); 429f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 430f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius variableTop = 0; 431f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 432f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasInvisibleBuckets = FALSE; 433f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 434f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Helper arrays for Chinese Pinyin collation. 435f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *asciiBuckets[26] = { 436f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 437f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL 438f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius }; 439f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *pinyinBuckets[26] = { 440f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 441f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL 442f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius }; 443f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasPinyin = FALSE; 444f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 4451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode); 4461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(errorCode)) { 447f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 448f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 449f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->setDeleter(uprv_deleteUObject); 450f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 451f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // underflow bucket 452f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW); 453f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 454f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 455f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 456f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 457f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 458f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 459f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 460f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString temp; 461f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 462f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // fix up the list, adding underflow, additions, overflow 463f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Insert inflow labels as needed. 464f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t scriptIndex = -1; 465f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *scriptUpperBoundary = &emptyString_; 466f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < indexCharacters.size(); ++i) { 467f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString ¤t = *getString(indexCharacters, i); 468f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) { 469f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We crossed the script boundary into a new script. 470f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &inflowBoundary = *scriptUpperBoundary; 471f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool skippedScript = FALSE; 472f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 473f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex); 474f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) { 475f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 476f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 477f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius skippedScript = TRUE; 478f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 479f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (skippedScript && bucketList->size() > 1) { 480f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We are skipping one or more scripts, 481f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and we are not just getting out of the underflow label. 482f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW); 483f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 484f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 485f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 486f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 487f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 488f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 489f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 490f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Add a bucket with the current label. 491f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL); 492f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 493f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 494f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 495f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 496f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 497f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Remember ASCII and Pinyin buckets for Pinyin redirects. 498f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar c; 499f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z 500f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius asciiBuckets[c - 0x41] = bucket; 501f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) && 502f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) { 503f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius pinyinBuckets[c - 0x41] = bucket; 504f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasPinyin = TRUE; 505f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 506f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Check for multiple primary weights. 507f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!current.startsWith(BASE, BASE_LENGTH) && 508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current, 509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces, errorCode) && 510f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) { 511f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // "AE-ligature" or "Sch" etc. 512f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = bucketList->size() - 2;; --i) { 513f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *singleBucket = getBucket(*bucketList, i); 514f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) { 515f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // There is no single-character bucket since the last 516f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // underflow or inflow label. 517f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 518f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 519f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (singleBucket->displayBucket_ == NULL && 520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, 521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius singleBucket->lowerBoundary_, 522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces, errorCode)) { 523f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Add an invisible bucket that redirects strings greater than the expansion 524f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // to the previous single-character bucket. 525f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // For example, after ... Q R S Sch we add Sch\uFFFF->S 526f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S. 527f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(emptyString_, 528f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString(current).append((UChar)0xFFFF), 529f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U_ALPHAINDEX_NORMAL); 530f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 531f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 532f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 533f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 534f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->displayBucket_ = singleBucket; 535f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 536f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasInvisibleBuckets = TRUE; 537f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 538f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 539f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 540f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 541f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 542f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 543f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList->size() == 1) { 544f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // No real labels, show only the underflow label. 545f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); 546f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 547f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 548f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 549f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 550f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 551f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 552f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 553f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // overflow bucket 554f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW); 555f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 556f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 557f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 558f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 559f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); // final 560f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 561f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hasPinyin) { 562f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Redirect Pinyin buckets. 563f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *asciiBucket = NULL; 564f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < 26; ++i) { 565f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (asciiBuckets[i] != NULL) { 566f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius asciiBucket = asciiBuckets[i]; 567f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 568f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (pinyinBuckets[i] != NULL && asciiBucket != NULL) { 569f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius pinyinBuckets[i]->displayBucket_ = asciiBucket; 570f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasInvisibleBuckets = TRUE; 571f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 572f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 573f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 574f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 575f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 576f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!hasInvisibleBuckets) { 577f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); 578f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 579f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 580f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 581f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 582f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 583f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 584f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 585f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Merge inflow buckets that are visually adjacent. 586f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Iterate backwards: Merge inflow into overflow rather than the other way around. 587f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = bucketList->size() - 1; 588f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *nextBucket = getBucket(*bucketList, i); 589f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (--i > 0) { 590f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = getBucket(*bucketList, i); 591f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 592f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius continue; // skip invisible buckets 593f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 594f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) { 595f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) { 596f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->displayBucket_ = nextBucket; 597f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius continue; 598f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 599f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 600f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = bucket; 601f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 602f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 6031b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode); 6041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(errorCode)) { 605f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 606f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 607f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Do not call publicBucketList->setDeleter(): 608f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This vector shares its objects with the bucketList. 609f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < bucketList->size(); ++i) { 610f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = getBucket(*bucketList, i); 611f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ == NULL) { 612f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius publicBucketList->addElement(bucket, errorCode); 613f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 614f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 615f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 616f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias()); 617f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 618f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 619f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 620f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 621f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 622f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius publicBucketList.orphan(); 623f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 624f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 625f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 626f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 627f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Creates an index, and buckets and sorts the list of records into the index. 628f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 629f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initBuckets(UErrorCode &errorCode) { 630f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode) || buckets_ != NULL) { 631f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 632f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 633f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_ = createBucketList(errorCode); 634f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) { 635b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 636b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 637b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 638f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Sort the records by name. 639f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Stable sort preserves input order of collation duplicates. 640f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode); 641f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 642f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Now, we traverse all of the input, which is now sorted. 643f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // If the item doesn't go in the current bucket, we find the next bucket that contains it. 644f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This makes the process order n*log(n), since we just sort the list and then do a linear process. 645f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so 646f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // we need to improve it for that case. 647f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 648f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0); 649f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t bucketIndex = 1; 650f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *nextBucket; 651f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *upperBoundary; 652f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketIndex < buckets_->bucketList_->size()) { 653f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); 654f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = &nextBucket->lowerBoundary_; 655f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 656f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = NULL; 657f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = NULL; 658f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 659f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < inputList_->size(); ++i) { 660f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Record *r = getRecord(*inputList_, i); 661f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // if the current bucket isn't the right one, find the one that is 662f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We have a special flag for the last bucket so that we don't look any further 663f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (upperBoundary != NULL && 664f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) { 665f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket = nextBucket; 666f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // now reset the boundary that we compare against 667f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketIndex < buckets_->bucketList_->size()) { 668f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); 669f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = &nextBucket->lowerBoundary_; 670b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 671f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = NULL; 672f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 673f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 674f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // now put the record into the bucket. 675f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *bucket = currentBucket; 676f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 677f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = bucket->displayBucket_; 678f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 679f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->records_ == NULL) { 680f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->records_ = new UVector(errorCode); 681f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->records_ == NULL) { 682f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 683f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 684b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 685b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 686f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->records_->addElement(r, errorCode); 687b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 688f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 690f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::clearBuckets() { 691f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ != NULL) { 692f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 693f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_ = NULL; 694f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius internalResetBucketIterator(); 695f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 696f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 697f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 698f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::internalResetBucketIterator() { 699f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_ = -1; 700f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket_ = NULL; 701b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 702b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 703b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) { 705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); 706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 707b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 708b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 709b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 710b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet exemplars; 711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); 712b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) { 713f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->addAll(exemplars); 714b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 715b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 716b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR 717b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 718f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The locale data did not include explicit Index characters. 719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Synthesize a set of them from the locale's standard exemplar characters. 720b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); 721b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 722b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 723b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 725f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // question: should we add auxiliary exemplars? 726f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { 727f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.add(0x61, 0x7A); 728f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 729f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables 730f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // cut down to small list 731f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.remove(0xAC00, 0xD7A3). 732f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). 733f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). 734f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). 735f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xD30C).add(0xD558); 736f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 737f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block 738f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // cut down to small list 739f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // make use of the fact that Ethiopic is allocated in 8's, where 740f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // the base is 0 mod 8. 741f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSet ethiopic( 742f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); 743f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator it(ethiopic); 744f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (it.next() && !it.isString()) { 745f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if ((it.getCodepoint() & 0x7) != 0) { 746f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.remove(it.getCodepoint()); 747f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 748f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 749f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 750f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 751b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Upper-case any that aren't already so. 752b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (We only do this for synthesized index characters.) 753b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSetIterator it(exemplars); 754b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString upperC; 755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (it.next()) { 756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UnicodeString &exemplarC = it.getString(); 757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho upperC = exemplarC; 758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius upperC.toUpper(locale); 759f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->add(upperC); 760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 761f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 762b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 763f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) { 764f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSet contractions; 765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode); 766fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; } 767fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius initialLabels_->addAll(contractions); 768f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator iter(contractions); 769f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (iter.next()) { 770f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &s = iter.getString(); 771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT (s.startsWith(BASE, BASE_LENGTH)); 772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = s.charAt(s.length() - 1); 773fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (0x41 <= c && c <= 0x5A) { // A-Z 774fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // There are Pinyin labels, add ASCII A-Z labels as well. 775fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius initialLabels_->add(0x41, 0x5A); // A-Z 776fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 782b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 783b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Return the string with interspersed CGJs. Input must have more than 2 codepoints. 785b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 786f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar CGJ = 0x034F; 787b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUnicodeString AlphabeticIndex::separated(const UnicodeString &item) { 788b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString result; 789b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (item.length() == 0) { 790b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 791b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 792b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i = 0; 793b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (;;) { 794b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar32 cp = item.char32At(i); 795b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.append(cp); 796b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho i = item.moveIndex32(i, 1); 797b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i >= item.length()) { 798b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 799b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 800b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.append(CGJ); 801b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 803b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 804b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 805b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 806b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const { 807b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 809b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 810b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 811b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const { 812b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 814b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 815b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 816b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst RuleBasedCollator &AlphabeticIndex::getCollator() const { 817fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return *collator_; 818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 819b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 821b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getInflowLabel() const { 822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return inflowLabel_; 823b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 824b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 825b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getOverflowLabel() const { 826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return overflowLabel_; 827b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 829b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 830b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getUnderflowLabel() const { 831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return underflowLabel_; 832b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 833b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inflowLabel_ = label; 837f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 839b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 841b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho overflowLabel_ = label; 844f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 846b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 847b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 848b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 849b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho underflowLabel_ = label; 851f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 855b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getMaxLabelCount() const { 857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return maxLabelCount_; 858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) { 862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (maxLabelCount <= 0) { 866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ILLEGAL_ARGUMENT_ERROR; 867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho maxLabelCount_ = maxLabelCount; 870f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 874b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// init() - Common code for constructors. 877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 879f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::init(const Locale *locale, UErrorCode &status) { 880f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 881f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (locale == NULL && collator_ == NULL) { 882f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 883b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 884b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 885f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 886b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho initialLabels_ = new UnicodeSet(); 887f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (initialLabels_ == NULL) { 888f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 889f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 890f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 891b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 892f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inflowLabel_.setTo((UChar)0x2026); // Ellipsis 893b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho overflowLabel_ = inflowLabel_; 894b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho underflowLabel_ = inflowLabel_; 895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 896f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collator_ == NULL) { 897fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collator *coll = Collator::createInstance(*locale, status); 898fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { 899fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete coll; 900fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 901fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 902fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (coll == NULL) { 903f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 904f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 905f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 906fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius collator_ = dynamic_cast<RuleBasedCollator *>(coll); 907fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (collator_ == NULL) { 908fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete coll; 909fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_UNSUPPORTED_ERROR; 910fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 911fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 912f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 913f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone()); 914f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_ == NULL) { 915f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 918f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status); 919f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_ = firstStringsInScript(status); 920f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 921f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status); 922f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Guard against a degenerate collator where 923f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // some script boundary strings are primary ignorable. 924f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 925f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 926f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (firstCharsInScripts_->isEmpty()) { 927f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // AlphabeticIndex requires some non-ignorable script boundary strings. 928f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 929f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 930b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 931f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare( 932f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)), 933f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius emptyString_, status) == UCOL_EQUAL) { 934f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->removeElementAt(0); 935f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 936f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 937b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 938fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 939fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 940fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Chinese index characters, which are specific to each of the several Chinese tailorings, 941fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // take precedence over the single locale data exemplar set per language. 942fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (!addChineseIndexCharacters(status) && locale != NULL) { 943fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addIndexExemplars(*locale, status); 944b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 945b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 947b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 948b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Comparison function for UVector<UnicodeString *> sorting with a collator. 950b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 951b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 952f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right) { 953103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *leftElement = static_cast<const UElement *>(left); 954103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *rightElement = static_cast<const UElement *>(right); 955103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer); 956103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer); 957b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 958b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (leftString == rightString) { 959b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Catches case where both are NULL 960b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 961b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 962b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (leftString == NULL) { 963b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 1; 964b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 965b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (rightString == NULL) { 966b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return -1; 967b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 968f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const Collator *col = static_cast<const Collator *>(context); 969f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 970f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return col->compare(*leftString, *rightString, errorCode); 971b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 972b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 973b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 974b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Comparison function for UVector<Record *> sorting with a collator. 975b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 977b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right) { 978103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *leftElement = static_cast<const UElement *>(left); 979103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *rightElement = static_cast<const UElement *>(right); 980103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer); 981103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer); 982b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const Collator *col = static_cast<const Collator *>(context); 983f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 984f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return col->compare(leftRec->name_, rightRec->name_, errorCode); 985b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 987b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { 988b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 989b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 990b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 9911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert LocalPointer<UVector> dest(new UVector(status), status); 9921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(status)) { 993b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 994b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 995103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius dest->setDeleter(uprv_deleteUObject); 996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Fetch the script-first-primary contractions which are defined in the root collator. 997fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // They all start with U+FDD1. 998fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet set; 999fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status); 1000fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { 1001fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 1002fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1003fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (set.isEmpty()) { 1004fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_UNSUPPORTED_ERROR; 1005fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 1006fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1007fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSetIterator iter(set); 1008fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while (iter.next()) { 1009fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &boundary = iter.getString(); 1010fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1)); 1011fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) { 1012fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Ignore boundaries for the special reordering groups. 1013fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Take only those for "real scripts" (where the sample character is a Letter, 1014fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and the one for unassigned implicit weights (Cn). 1015fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 1016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1017fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString *s = new UnicodeString(boundary); 1018fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (s == NULL) { 1019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_MEMORY_ALLOCATION_ERROR; 1020fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 1021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1022fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest->addElement(s, status); 1023fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1024fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return dest.orphan(); 1025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1027b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1028f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 1029b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1030b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 1031f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Returns true if one index character string is "better" than the other. 1032f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Shorter NFKD is better, and otherwise NFKD-binary-less-than is 1033f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * better, and otherwise binary-less-than is better. 1034b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 1035f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, 1036f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &one, const UnicodeString &other) { 1037f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This is called with primary-equal strings, but never with one.equals(other). 1038f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode status = U_ZERO_ERROR; 1039f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString n1 = nfkdNormalizer.normalize(one, status); 1040f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString n2 = nfkdNormalizer.normalize(other, status); 1041f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return FALSE; } 1042f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t result = n1.countChar32() - n2.countChar32(); 1043b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result != 0) { 1044f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return result < 0; 1045b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1046b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = n1.compareCodePointOrder(n2); 1047b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result != 0) { 1048f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return result < 0; 1049b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1050f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return one.compareCodePointOrder(other) < 0; 1051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1053f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 1054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1055b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1056b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Constructor & Destructor for AlphabeticIndex::Record 1057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Records are internal only, instances are not directly surfaced in the public API. 1059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// This class is mostly struct-like, with all public fields. 1060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1061f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::Record::Record(const UnicodeString &name, const void *data) 1062f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : name_(name), data_(data) {} 1063f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 1064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Record::~Record() { 1065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1066b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1067b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1068b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) { 1069b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1070b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1071b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1072f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (inputList_ == NULL) { 1073f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_ = new UVector(status); 1074f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (inputList_ == NULL) { 1075f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1076f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return *this; 1077f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1078f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->setDeleter(alphaIndex_deleteRecord); 1079f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1080f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Record *r = new Record(name, data); 1081f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (r == NULL) { 1082f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1083f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return *this; 1084f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1085f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->addElement(r, status); 1086f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 1087b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::string ss; 1088b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::string ss2; 1089b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << 1090b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl; 1091b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1092b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1093b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1094b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) { 1096f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) { 1097f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->removeAllElements(); 1098f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 1099b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) { 1104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 1105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status); 1109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex() const { 1113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return labelsIterIndex_; 1114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextBucket(UErrorCode &status) { 1118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ == NULL && currentBucket_ != NULL) { 1122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ENUM_OUT_OF_SYNC_ERROR; 1123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 1126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++labelsIterIndex_; 1130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (labelsIterIndex_ >= buckets_->getBucketCount()) { 1131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_ = buckets_->getBucketCount(); 1132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1134f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_); 1135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho resetRecordIterator(); 1136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 1137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getBucketLabel() const { 1140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ != NULL) { 1141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->label_; 1142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return emptyString_; 1144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const { 1149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ != NULL) { 1150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->labelType_; 1151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return U_ALPHAINDEX_NORMAL; 1153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketRecordCount() const { 1158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL) { 1159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->records_->size(); 1160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) { 1167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius internalResetBucketIterator(); 1171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextRecord(UErrorCode &status) { 1176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ == NULL) { 1180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We are trying to iterate over the items in a bucket, but there is no 1181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // current bucket from the enumeration of buckets. 1182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_INVALID_STATE_ERROR; 1183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ == NULL) { 1186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ENUM_OUT_OF_SYNC_ERROR; 1187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_->records_ == NULL) { 1190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 1191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++itemsIterIndex_; 1193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (itemsIterIndex_ >= currentBucket_->records_->size()) { 1194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ = currentBucket_->records_->size(); 1195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 1198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getRecordName() const { 1202f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *retStr = &emptyString_; 1203f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL && 1204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ >= 0 && 1205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ < currentBucket_->records_->size()) { 1206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); 1207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retStr = &item->name_; 1208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *retStr; 1210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst void *AlphabeticIndex::getRecordData() const { 1213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const void *retPtr = NULL; 1214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL && 1215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ >= 0 && 1216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ < currentBucket_->records_->size()) { 1217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); 1218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retPtr = item->data_; 1219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return retPtr; 1221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::resetRecordIterator() { 1225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ = -1; 1226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::Bucket(const UnicodeString &label, 1232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UnicodeString &lowerBoundary, 1233f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UAlphabeticIndexLabelType type) 1234f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : label_(label), lowerBoundary_(lowerBoundary), labelType_(type), 1235f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius displayBucket_(NULL), displayIndex_(-1), 1236f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius records_(NULL) { 1237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::~Bucket() { 1241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete records_; 1242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 1245103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 1246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 1247