1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2009-2014, International Business Machines Corporation and 4f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/alphaindex.h" 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/coll.h" 14f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/localpointer.h" 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/normalizer2.h" 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/tblcoll.h" 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uchar.h" 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ulocdata.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uniset.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h" 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/usetiter.h" 22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 24f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "cmemory.h" 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cstring.h" 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h" 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uvector.h" 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h" 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//#include <string> 31103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//#include <iostream> 32f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 33f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 34f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 37f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 39f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 40f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Prefix string for Chinese index buckets. 41f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes 42f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 43f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UChar BASE[1] = { 0xFDD0 }; 44f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst int32_t BASE_LENGTH = 1; 45f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 46f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, 47f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &one, const UnicodeString &other); 48f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 49f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 52f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right); 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right); 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// UVector<Record *> support function, delete a Record. 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void U_CALLCONV 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoalphaIndex_deleteRecord(void *obj) { 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete static_cast<AlphabeticIndex::Record *>(obj); 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 63f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 65f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned, 66f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode &errorCode) { 67f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 68f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (owned.isValid()) { 69f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return owned.orphan(); 70f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 71f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString *p = new UnicodeString(s); 72f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (p == NULL) { 73f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 74f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 75f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return p; 76f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 78f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline UnicodeString *getString(const UVector &list, int32_t i) { 79f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<UnicodeString *>(list[i]); 80f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 82f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) { 83f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<AlphabeticIndex::Bucket *>(list[i]); 84f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 86f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) { 87f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<AlphabeticIndex::Record *>(list[i]); 88f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 89f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 90f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 91f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Like Java Collections.binarySearch(List, String, Comparator). 92f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 93f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * @return the index>=0 where the item was found, 94f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * or the index<0 for inserting the string at ~index in sorted order 95f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 96f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) { 97f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (list.size() == 0) { return ~0; } 98f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t start = 0; 99f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t limit = list.size(); 100f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = (start + limit) / 2; 102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i)); 103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UCollationResult cmp = coll.compare(s, *si, errorCode); 105f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (cmp == UCOL_EQUAL) { 106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return i; 107f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (cmp < 0) { 108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (i == start) { 109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return ~start; // insert s before *si 110f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius limit = i; 112f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (i == start) { 114f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return ~(start + 1); // insert s after *si 115f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 116f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius start = i; 117f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 123f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// The BucketList is not in the anonymous namespace because only Clang 124f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// seems to support its use in other classes from there. 125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// However, we also don't need U_I18N_API because it is not used from outside the i18n library. 126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusclass BucketList : public UObject { 127f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliuspublic: 128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList(UVector *bucketList, UVector *publicBucketList) 129f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : bucketList_(bucketList), immutableVisibleList_(publicBucketList) { 130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t displayIndex = 0; 131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < publicBucketList->size(); ++i) { 132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++; 133f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 134f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // The virtual destructor must not be inline. 1378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // See ticket #8454 for details. 1388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius virtual ~BucketList(); 139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 140f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t getBucketCount() const { 141f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return immutableVisibleList_->size(); 142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 144f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly, 145f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode &errorCode) { 146f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // binary search 147f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t start = 0; 148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t limit = bucketList_->size(); 149f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while ((start + 1) < limit) { 150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = (start + limit) / 2; 151f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i); 152f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UCollationResult nameVsBucket = 153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode); 154f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (nameVsBucket < 0) { 155f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius limit = i; 156f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 157f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius start = i; 158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 159f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 160f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start); 161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = bucket->displayBucket_; 163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 164f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bucket->displayIndex_; 165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 166f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 167f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius /** All of the buckets, visible and invisible. */ 168f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector *bucketList_; 169f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius /** Just the visible buckets. */ 170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector *immutableVisibleList_; 171f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}; 172f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusBucketList::~BucketList() { 1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete bucketList_; 1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (immutableVisibleList_ != bucketList_) { 1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete immutableVisibleList_; 1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::~ImmutableIndex() { 181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete collatorPrimaryOnly_; 183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t 186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketCount() const { 187f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketCount(); 188f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t 191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketIndex( 192f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &name, UErrorCode &errorCode) const { 193f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode); 194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst AlphabeticIndex::Bucket * 197f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const { 198f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0 <= index && index < buckets_->getBucketCount()) { 199f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return icu::getBucket(*buckets_->immutableVisibleList_, index); 200f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 201f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 205f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status) 206f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : inputList_(NULL), 207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL), 208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius maxLabelCount_(99), 209f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_(NULL), firstCharsInScripts_(NULL), 210f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_(NULL), collatorPrimaryOnly_(NULL), 211f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_(NULL) { 212f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius init(&locale, status); 213f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 215f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 216f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status) 217f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : inputList_(NULL), 218f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL), 219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius maxLabelCount_(99), 220f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_(NULL), firstCharsInScripts_(NULL), 221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_(collator), collatorPrimaryOnly_(NULL), 222f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_(NULL) { 223f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius init(NULL, status); 224f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 225f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 226f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::~AlphabeticIndex() { 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete collator_; 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete collatorPrimaryOnly_; 231f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete firstCharsInScripts_; 232f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 233f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete inputList_; 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete initialLabels_; 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) { 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho initialLabels_->addAll(additions); 243f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) { 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addIndexExemplars(locale, status); 250f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 255f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) { 256f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 257f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // In C++, the ImmutableIndex must own its copy of the BucketList, 258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // even if it contains no records, for proper memory management. 259f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We could clone the buckets_ if they are not NULL, 260f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // but that would be worth it only if this method is called multiple times, 261f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // or called after using the old-style bucket iterator API. 262f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode)); 263f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<RuleBasedCollator> coll( 264f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone())); 265f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immutableBucketList.isNull() || coll.isNull()) { 266f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 267f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 268f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 269f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias()); 270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immIndex == NULL) { 271f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 272f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 273f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 274f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The ImmutableIndex adopted its parameter objects. 275f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius immutableBucketList.orphan(); 276f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius coll.orphan(); 277f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return immIndex; 278f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 279f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketCount(UErrorCode &status) { 281f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 285f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketCount(); 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getRecordCount(UErrorCode &status) { 290f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status) || inputList_ == NULL) { 291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 293f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return inputList_->size(); 294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 296f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { 297f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); 298f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return; } 299f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 300f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0); 301f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &overflowBoundary = 302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1); 303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 304f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We make a sorted array of elements. 305f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Some of the input may be redundant. 306f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". 307f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We filter out those cases. 308f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator iter(*initialLabels_); 309f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (iter.next()) { 310f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *item = &iter.getString(); 311f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UnicodeString> ownedItem; 312f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool checkDistinct; 313f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t itemLength = item->length(); 314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!item->hasMoreChar32Than(0, itemLength, 1)) { 315f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = FALSE; 316f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if(item->charAt(itemLength - 1) == 0x2a && // '*' 317f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius item->charAt(itemLength - 2) != 0x2a) { 318f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Use a label if it is marked with one trailing star, 319f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // even if the label string sorts the same when all contractions are suppressed. 320f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1)); 321f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius item = ownedItem.getAlias(); 322f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (item == NULL) { 323f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 324f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 326f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = FALSE; 327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 328f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = TRUE; 329f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 330f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) { 331f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore a primary-ignorable or non-alphabetic index character. 332f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) { 333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Ignore an index character that will land in the overflow bucket. 334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (checkDistinct && 335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) { 336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore a multi-code point index character that does not sort distinctly 337f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // from the sequence of its separate characters. 338f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 339f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_); 340f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (insertionPoint < 0) { 341f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.insertElementAt( 342f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode); 343f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 344f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint); 345f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) { 346f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.setElementAt( 347f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedString(*item, ownedItem, errorCode), insertionPoint); 348f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 349f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 352f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return; } 353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 354f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // if the result is still too large, cut down to maxCount elements, by removing every nth element 355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 356f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t size = indexCharacters.size() - 1; 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (size > maxLabelCount_) { 358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t count = 0; 359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t old = -1; 360f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < indexCharacters.size();) { 361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++count; 362f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t bump = count * maxLabelCount_ / size; 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (bump == old) { 364f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.removeElementAt(i); 365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho old = bump; 367f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ++i; 368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 371f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 373f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 374f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 375f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UnicodeString &fixLabel(const UnicodeString ¤t, UnicodeString &temp) { 376f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!current.startsWith(BASE, BASE_LENGTH)) { 377f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return current; 378f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 379f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar rest = current.charAt(BASE_LENGTH); 380f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0x2800 < rest && rest <= 0x28FF) { // stroke count 381f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t count = rest-0x2800; 382f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.setTo((UChar)(0x30 + count % 10)); 383f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (count >= 10) { 384f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius count /= 10; 385f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.insert(0, (UChar)(0x30 + count % 10)); 386f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (count >= 10) { 387f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius count /= 10; 388f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.insert(0, (UChar)(0x30 + count)); 389f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 390f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 391f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return temp.append((UChar)0x5283); 392f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 393f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return temp.setTo(current, BASE_LENGTH); 394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 396f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool hasMultiplePrimaryWeights( 397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const RuleBasedCollator &coll, uint32_t variableTop, 398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) { 399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces.removeAllElements(); 400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll.internalGetCEs(s, ces, errorCode); 401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(errorCode)) { return FALSE; } 402f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool seenPrimary = FALSE; 403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for (int32_t i = 0; i < ces.size(); ++i) { 404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = ces.elementAti(i); 405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p = (uint32_t)(ce >> 32); 406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (p > variableTop) { 407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // not primary ignorable 408f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (seenPrimary) { 409f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return TRUE; 410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 411f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius seenPrimary = TRUE; 412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 413f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 414f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 417f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 419f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusBucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { 420f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Initialize indexCharacters. 421f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector indexCharacters(errorCode); 422f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.setDeleter(uprv_deleteUObject); 423f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initLabels(indexCharacters, errorCode); 424f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 425f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 426f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Variables for hasMultiplePrimaryWeights(). 427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector64 ces(errorCode); 428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t variableTop; 429f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) { 430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius variableTop = collatorPrimaryOnly_->getVariableTop(errorCode); 431f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 432f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius variableTop = 0; 433f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 434f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasInvisibleBuckets = FALSE; 435f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 436f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Helper arrays for Chinese Pinyin collation. 437f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *asciiBuckets[26] = { 438f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 439f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL 440f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius }; 441f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *pinyinBuckets[26] = { 442f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 443f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL 444f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius }; 445f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasPinyin = FALSE; 446f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 447f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UVector> bucketList(new UVector(errorCode)); 448f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList.isNull()) { 449f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 450f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 451f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 452f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->setDeleter(uprv_deleteUObject); 453f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 454f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // underflow bucket 455f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW); 456f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 457f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 458f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 459f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 460f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 461f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 462f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 463f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString temp; 464f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 465f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // fix up the list, adding underflow, additions, overflow 466f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Insert inflow labels as needed. 467f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t scriptIndex = -1; 468f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *scriptUpperBoundary = &emptyString_; 469f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < indexCharacters.size(); ++i) { 470f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString ¤t = *getString(indexCharacters, i); 471f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) { 472f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We crossed the script boundary into a new script. 473f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &inflowBoundary = *scriptUpperBoundary; 474f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool skippedScript = FALSE; 475f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 476f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex); 477f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) { 478f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 479f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 480f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius skippedScript = TRUE; 481f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 482f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (skippedScript && bucketList->size() > 1) { 483f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We are skipping one or more scripts, 484f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and we are not just getting out of the underflow label. 485f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW); 486f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 487f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 488f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 489f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 490f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 491f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 492f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 493f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Add a bucket with the current label. 494f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL); 495f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 496f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 497f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 498f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 499f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 500f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Remember ASCII and Pinyin buckets for Pinyin redirects. 501f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar c; 502f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z 503f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius asciiBuckets[c - 0x41] = bucket; 504f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) && 505f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) { 506f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius pinyinBuckets[c - 0x41] = bucket; 507f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasPinyin = TRUE; 508f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 509f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Check for multiple primary weights. 510f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!current.startsWith(BASE, BASE_LENGTH) && 511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current, 512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces, errorCode) && 513f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) { 514f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // "AE-ligature" or "Sch" etc. 515f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = bucketList->size() - 2;; --i) { 516f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *singleBucket = getBucket(*bucketList, i); 517f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) { 518f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // There is no single-character bucket since the last 519f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // underflow or inflow label. 520f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 521f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 522f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (singleBucket->displayBucket_ == NULL && 523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, 524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius singleBucket->lowerBoundary_, 525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces, errorCode)) { 526f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Add an invisible bucket that redirects strings greater than the expansion 527f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // to the previous single-character bucket. 528f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // For example, after ... Q R S Sch we add Sch\uFFFF->S 529f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S. 530f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(emptyString_, 531f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString(current).append((UChar)0xFFFF), 532f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U_ALPHAINDEX_NORMAL); 533f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 534f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 535f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 536f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 537f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->displayBucket_ = singleBucket; 538f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 539f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasInvisibleBuckets = TRUE; 540f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 541f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 542f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 543f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 544f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 545f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 546f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList->size() == 1) { 547f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // No real labels, show only the underflow label. 548f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); 549f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 550f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 551f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 552f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 553f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 554f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 555f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 556f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // overflow bucket 557f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW); 558f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 559f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 560f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 561f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 562f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); // final 563f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 564f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hasPinyin) { 565f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Redirect Pinyin buckets. 566f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *asciiBucket = NULL; 567f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < 26; ++i) { 568f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (asciiBuckets[i] != NULL) { 569f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius asciiBucket = asciiBuckets[i]; 570f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 571f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (pinyinBuckets[i] != NULL && asciiBucket != NULL) { 572f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius pinyinBuckets[i]->displayBucket_ = asciiBucket; 573f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasInvisibleBuckets = TRUE; 574f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 575f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 576f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 577f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 578f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 579f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!hasInvisibleBuckets) { 580f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); 581f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 582f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 583f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 584f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 585f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 586f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 587f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 588f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Merge inflow buckets that are visually adjacent. 589f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Iterate backwards: Merge inflow into overflow rather than the other way around. 590f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = bucketList->size() - 1; 591f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *nextBucket = getBucket(*bucketList, i); 592f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (--i > 0) { 593f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = getBucket(*bucketList, i); 594f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 595f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius continue; // skip invisible buckets 596f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 597f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) { 598f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) { 599f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->displayBucket_ = nextBucket; 600f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius continue; 601f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 602f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 603f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = bucket; 604f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 605f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 606f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UVector> publicBucketList(new UVector(errorCode)); 607f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList.isNull()) { 608f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 609f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 610f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 611f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Do not call publicBucketList->setDeleter(): 612f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This vector shares its objects with the bucketList. 613f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < bucketList->size(); ++i) { 614f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = getBucket(*bucketList, i); 615f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ == NULL) { 616f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius publicBucketList->addElement(bucket, errorCode); 617f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 618f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 619f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 620f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias()); 621f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 622f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 623f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 624f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 625f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 626f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius publicBucketList.orphan(); 627f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 628f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 629f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 630f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 631f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Creates an index, and buckets and sorts the list of records into the index. 632f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 633f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initBuckets(UErrorCode &errorCode) { 634f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode) || buckets_ != NULL) { 635f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 636f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 637f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_ = createBucketList(errorCode); 638f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) { 639b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 641b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 642f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Sort the records by name. 643f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Stable sort preserves input order of collation duplicates. 644f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode); 645f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 646f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Now, we traverse all of the input, which is now sorted. 647f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // If the item doesn't go in the current bucket, we find the next bucket that contains it. 648f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This makes the process order n*log(n), since we just sort the list and then do a linear process. 649f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so 650f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // we need to improve it for that case. 651f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 652f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0); 653f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t bucketIndex = 1; 654f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *nextBucket; 655f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *upperBoundary; 656f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketIndex < buckets_->bucketList_->size()) { 657f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); 658f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = &nextBucket->lowerBoundary_; 659f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 660f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = NULL; 661f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = NULL; 662f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 663f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < inputList_->size(); ++i) { 664f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Record *r = getRecord(*inputList_, i); 665f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // if the current bucket isn't the right one, find the one that is 666f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We have a special flag for the last bucket so that we don't look any further 667f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (upperBoundary != NULL && 668f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) { 669f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket = nextBucket; 670f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // now reset the boundary that we compare against 671f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketIndex < buckets_->bucketList_->size()) { 672f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); 673f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = &nextBucket->lowerBoundary_; 674b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 675f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = NULL; 676f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 677f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 678f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // now put the record into the bucket. 679f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *bucket = currentBucket; 680f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 681f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = bucket->displayBucket_; 682f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 683f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->records_ == NULL) { 684f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->records_ = new UVector(errorCode); 685f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->records_ == NULL) { 686f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 687f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 688b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 690f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->records_->addElement(r, errorCode); 691b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 692f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 693b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 694f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::clearBuckets() { 695f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ != NULL) { 696f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 697f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_ = NULL; 698f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius internalResetBucketIterator(); 699f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 700f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 701f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 702f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::internalResetBucketIterator() { 703f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_ = -1; 704f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket_ = NULL; 705b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 707b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) { 709fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); 710b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 712b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 713b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 714b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet exemplars; 715b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); 716b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) { 717f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->addAll(exemplars); 718b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 720b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR 721b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 722f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The locale data did not include explicit Index characters. 723b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Synthesize a set of them from the locale's standard exemplar characters. 724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); 725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 726b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 727b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 728b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 729f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // question: should we add auxiliary exemplars? 730f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { 731f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.add(0x61, 0x7A); 732f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 733f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables 734f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // cut down to small list 735f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.remove(0xAC00, 0xD7A3). 736f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). 737f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). 738f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). 739f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xD30C).add(0xD558); 740f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 741f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block 742f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // cut down to small list 743f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // make use of the fact that Ethiopic is allocated in 8's, where 744f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // the base is 0 mod 8. 745f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSet ethiopic( 746f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); 747f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator it(ethiopic); 748f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (it.next() && !it.isString()) { 749f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if ((it.getCodepoint() & 0x7) != 0) { 750f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.remove(it.getCodepoint()); 751f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 752f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 753f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 754f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 755b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Upper-case any that aren't already so. 756b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (We only do this for synthesized index characters.) 757b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSetIterator it(exemplars); 758b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString upperC; 759b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (it.next()) { 760b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UnicodeString &exemplarC = it.getString(); 761b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho upperC = exemplarC; 762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius upperC.toUpper(locale); 763f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->add(upperC); 764b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 765f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 766b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 767f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) { 768f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSet contractions; 769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode); 770fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; } 771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius initialLabels_->addAll(contractions); 772f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator iter(contractions); 773f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (iter.next()) { 774f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &s = iter.getString(); 775fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT (s.startsWith(BASE, BASE_LENGTH)); 776fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = s.charAt(s.length() - 1); 777fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (0x41 <= c && c <= 0x5A) { // A-Z 778fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // There are Pinyin labels, add ASCII A-Z labels as well. 779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius initialLabels_->add(0x41, 0x5A); // A-Z 780fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 782b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 783fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 785b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 786b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 787b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 788b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Return the string with interspersed CGJs. Input must have more than 2 codepoints. 789b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 790f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar CGJ = 0x034F; 791b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUnicodeString AlphabeticIndex::separated(const UnicodeString &item) { 792b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString result; 793b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (item.length() == 0) { 794b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 795b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 796b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i = 0; 797b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (;;) { 798b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar32 cp = item.char32At(i); 799b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.append(cp); 800b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho i = item.moveIndex32(i, 1); 801b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i >= item.length()) { 802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 803b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 804b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.append(CGJ); 805b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 806b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 807b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 809b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 810b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const { 811b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 812b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 814b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 815b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const { 816b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 817b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 818b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 819b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst RuleBasedCollator &AlphabeticIndex::getCollator() const { 821fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return *collator_; 822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 823b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 824b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 825b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getInflowLabel() const { 826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return inflowLabel_; 827b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 829b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getOverflowLabel() const { 830b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return overflowLabel_; 831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 832b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 833b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getUnderflowLabel() const { 835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return underflowLabel_; 836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 837b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 839b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inflowLabel_ = label; 841f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 844b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 846b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 847b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho overflowLabel_ = label; 848f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 849b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 851b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho underflowLabel_ = label; 855f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getMaxLabelCount() const { 861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return maxLabelCount_; 862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) { 866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (maxLabelCount <= 0) { 870b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ILLEGAL_ARGUMENT_ERROR; 871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho maxLabelCount_ = maxLabelCount; 874f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 879b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 880b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// init() - Common code for constructors. 881b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 882b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 883f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::init(const Locale *locale, UErrorCode &status) { 884f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 885f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (locale == NULL && collator_ == NULL) { 886f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 887b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 889f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 890b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho initialLabels_ = new UnicodeSet(); 891f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (initialLabels_ == NULL) { 892f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 893f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 894f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 896f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inflowLabel_.setTo((UChar)0x2026); // Ellipsis 897b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho overflowLabel_ = inflowLabel_; 898b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho underflowLabel_ = inflowLabel_; 899b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 900f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collator_ == NULL) { 901fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collator *coll = Collator::createInstance(*locale, status); 902fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { 903fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete coll; 904fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 905fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 906fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (coll == NULL) { 907f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 908f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 909f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 910fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius collator_ = dynamic_cast<RuleBasedCollator *>(coll); 911fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (collator_ == NULL) { 912fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete coll; 913fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_UNSUPPORTED_ERROR; 914fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 915fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 916f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 917f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone()); 918f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_ == NULL) { 919f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 922f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status); 923f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_ = firstStringsInScript(status); 924f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 925f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status); 926f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Guard against a degenerate collator where 927f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // some script boundary strings are primary ignorable. 928f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 929f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 930f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (firstCharsInScripts_->isEmpty()) { 931f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // AlphabeticIndex requires some non-ignorable script boundary strings. 932f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 933f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 934b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 935f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare( 936f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)), 937f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius emptyString_, status) == UCOL_EQUAL) { 938f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->removeElementAt(0); 939f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 940f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 941b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 942fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 943fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 944fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Chinese index characters, which are specific to each of the several Chinese tailorings, 945fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // take precedence over the single locale data exemplar set per language. 946fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (!addChineseIndexCharacters(status) && locale != NULL) { 947fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addIndexExemplars(*locale, status); 948b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 950b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 951b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 952b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 953b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Comparison function for UVector<UnicodeString *> sorting with a collator. 954b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 955b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 956f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right) { 957103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *leftElement = static_cast<const UElement *>(left); 958103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *rightElement = static_cast<const UElement *>(right); 959103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer); 960103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer); 961b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 962b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (leftString == rightString) { 963b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Catches case where both are NULL 964b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 965b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 966b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (leftString == NULL) { 967b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 1; 968b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 969b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (rightString == NULL) { 970b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return -1; 971b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 972f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const Collator *col = static_cast<const Collator *>(context); 973f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 974f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return col->compare(*leftString, *rightString, errorCode); 975b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 977b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 978b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Comparison function for UVector<Record *> sorting with a collator. 979b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 980b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 981b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right) { 982103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *leftElement = static_cast<const UElement *>(left); 983103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *rightElement = static_cast<const UElement *>(right); 984103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer); 985103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer); 986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const Collator *col = static_cast<const Collator *>(context); 987f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 988f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return col->compare(leftRec->name_, rightRec->name_, errorCode); 989b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 990b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 991b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { 992b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 993b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 994b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 995fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalPointer<UVector> dest(new UVector(status)); 996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (dest.isNull()) { 997f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 998b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 999b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1000103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius dest->setDeleter(uprv_deleteUObject); 1001fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Fetch the script-first-primary contractions which are defined in the root collator. 1002fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // They all start with U+FDD1. 1003fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet set; 1004fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status); 1005fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { 1006fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 1007fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1008fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (set.isEmpty()) { 1009fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_UNSUPPORTED_ERROR; 1010fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 1011fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1012fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSetIterator iter(set); 1013fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while (iter.next()) { 1014fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &boundary = iter.getString(); 1015fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1)); 1016fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) { 1017fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Ignore boundaries for the special reordering groups. 1018fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Take only those for "real scripts" (where the sample character is a Letter, 1019fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and the one for unassigned implicit weights (Cn). 1020fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 1021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1022fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString *s = new UnicodeString(boundary); 1023fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (s == NULL) { 1024b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_MEMORY_ALLOCATION_ERROR; 1025fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 1026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1027fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest->addElement(s, status); 1028fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1029fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return dest.orphan(); 1030b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1032b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1033f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 1034b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1035b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 1036f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Returns true if one index character string is "better" than the other. 1037f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Shorter NFKD is better, and otherwise NFKD-binary-less-than is 1038f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * better, and otherwise binary-less-than is better. 1039b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 1040f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, 1041f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &one, const UnicodeString &other) { 1042f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This is called with primary-equal strings, but never with one.equals(other). 1043f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode status = U_ZERO_ERROR; 1044f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString n1 = nfkdNormalizer.normalize(one, status); 1045f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString n2 = nfkdNormalizer.normalize(other, status); 1046f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return FALSE; } 1047f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t result = n1.countChar32() - n2.countChar32(); 1048b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result != 0) { 1049f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return result < 0; 1050b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1051b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = n1.compareCodePointOrder(n2); 1052b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result != 0) { 1053f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return result < 0; 1054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1055f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return one.compareCodePointOrder(other) < 0; 1056b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1058f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 1059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1060b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1061b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Constructor & Destructor for AlphabeticIndex::Record 1062b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1063b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Records are internal only, instances are not directly surfaced in the public API. 1064b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// This class is mostly struct-like, with all public fields. 1065b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1066f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::Record::Record(const UnicodeString &name, const void *data) 1067f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : name_(name), data_(data) {} 1068f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 1069b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Record::~Record() { 1070b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1071b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1072b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1073b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) { 1074b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1075b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1076b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1077f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (inputList_ == NULL) { 1078f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_ = new UVector(status); 1079f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (inputList_ == NULL) { 1080f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1081f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return *this; 1082f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1083f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->setDeleter(alphaIndex_deleteRecord); 1084f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1085f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Record *r = new Record(name, data); 1086f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (r == NULL) { 1087f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1088f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return *this; 1089f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1090f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->addElement(r, status); 1091f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 1092b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::string ss; 1093b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::string ss2; 1094b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << 1095b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl; 1096b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1097b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1098b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1099b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) { 1101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) { 1102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->removeAllElements(); 1103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 1104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) { 1109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 1110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status); 1114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex() const { 1118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return labelsIterIndex_; 1119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextBucket(UErrorCode &status) { 1123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ == NULL && currentBucket_ != NULL) { 1127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ENUM_OUT_OF_SYNC_ERROR; 1128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 1131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++labelsIterIndex_; 1135f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (labelsIterIndex_ >= buckets_->getBucketCount()) { 1136f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_ = buckets_->getBucketCount(); 1137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_); 1140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho resetRecordIterator(); 1141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 1142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getBucketLabel() const { 1145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ != NULL) { 1146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->label_; 1147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return emptyString_; 1149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const { 1154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ != NULL) { 1155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->labelType_; 1156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return U_ALPHAINDEX_NORMAL; 1158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketRecordCount() const { 1163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL) { 1164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->records_->size(); 1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) { 1172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1175f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius internalResetBucketIterator(); 1176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextRecord(UErrorCode &status) { 1181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ == NULL) { 1185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We are trying to iterate over the items in a bucket, but there is no 1186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // current bucket from the enumeration of buckets. 1187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_INVALID_STATE_ERROR; 1188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ == NULL) { 1191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ENUM_OUT_OF_SYNC_ERROR; 1192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_->records_ == NULL) { 1195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 1196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++itemsIterIndex_; 1198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (itemsIterIndex_ >= currentBucket_->records_->size()) { 1199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ = currentBucket_->records_->size(); 1200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 1203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getRecordName() const { 1207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *retStr = &emptyString_; 1208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL && 1209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ >= 0 && 1210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ < currentBucket_->records_->size()) { 1211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); 1212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retStr = &item->name_; 1213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *retStr; 1215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst void *AlphabeticIndex::getRecordData() const { 1218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const void *retPtr = NULL; 1219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL && 1220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ >= 0 && 1221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ < currentBucket_->records_->size()) { 1222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); 1223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retPtr = item->data_; 1224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return retPtr; 1226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::resetRecordIterator() { 1230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ = -1; 1231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::Bucket(const UnicodeString &label, 1237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UnicodeString &lowerBoundary, 1238f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UAlphabeticIndexLabelType type) 1239f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : label_(label), lowerBoundary_(lowerBoundary), labelType_(type), 1240f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius displayBucket_(NULL), displayIndex_(-1), 1241f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius records_(NULL) { 1242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::~Bucket() { 1246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete records_; 1247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 1250103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 1251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 1252