1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 3f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* Copyright (C) 2009-2013, International Business Machines Corporation and 4f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 10103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION 11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/alphaindex.h" 13f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/coleitr.h" 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/coll.h" 15f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/localpointer.h" 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/normalizer2.h" 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/tblcoll.h" 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ulocdata.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uniset.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h" 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/usetiter.h" 22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 24f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "cmemory.h" 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cstring.h" 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h" 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uvector.h" 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//#include <string> 30103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//#include <iostream> 31f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 32f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 33f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 36f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 38f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 39f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Prefix string for Chinese index buckets. 40f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes 41f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 42f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UChar BASE[1] = { 0xFDD0 }; 43f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst int32_t BASE_LENGTH = 1; 44f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 45f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, 46f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &one, const UnicodeString &other); 47f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 48f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 51f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right); 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right); 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// UVector<Record *> support function, delete a Record. 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void U_CALLCONV 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoalphaIndex_deleteRecord(void *obj) { 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete static_cast<AlphabeticIndex::Record *>(obj); 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 62f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 64f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned, 65f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode &errorCode) { 66f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 67f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (owned.isValid()) { 68f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return owned.orphan(); 69f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 70f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString *p = new UnicodeString(s); 71f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (p == NULL) { 72f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 73f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 74f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return p; 75f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 77f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline UnicodeString *getString(const UVector &list, int32_t i) { 78f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<UnicodeString *>(list[i]); 79f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 81f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) { 82f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<AlphabeticIndex::Bucket *>(list[i]); 83f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 85f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) { 86f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<AlphabeticIndex::Record *>(list[i]); 87f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 88f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 89f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 90f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Like Java Collections.binarySearch(List, String, Comparator). 91f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 92f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * @return the index>=0 where the item was found, 93f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * or the index<0 for inserting the string at ~index in sorted order 94f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 95f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) { 96f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (list.size() == 0) { return ~0; } 97f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t start = 0; 98f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t limit = list.size(); 99f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 100f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = (start + limit) / 2; 101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i)); 102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UCollationResult cmp = coll.compare(s, *si, errorCode); 104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (cmp == UCOL_EQUAL) { 105f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return i; 106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (cmp < 0) { 107f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (i == start) { 108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return ~start; // insert s before *si 109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 110f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius limit = i; 111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 112f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (i == start) { 113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return ~(start + 1); // insert s after *si 114f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 115f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius start = i; 116f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 120f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// The BucketList is not in the anonymous namespace because only Clang 123f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// seems to support its use in other classes from there. 124f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// However, we also don't need U_I18N_API because it is not used from outside the i18n library. 125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusclass BucketList : public UObject { 126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliuspublic: 127f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList(UVector *bucketList, UVector *publicBucketList) 128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : bucketList_(bucketList), immutableVisibleList_(publicBucketList) { 129f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t displayIndex = 0; 130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < publicBucketList->size(); ++i) { 131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++; 132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 133f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 135f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius virtual ~BucketList() { 136f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete bucketList_; 137f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immutableVisibleList_ != bucketList_) { 138f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete immutableVisibleList_; 139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 142f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t getBucketCount() const { 143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return immutableVisibleList_->size(); 144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 145f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 146f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly, 147f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode &errorCode) { 148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // binary search 149f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t start = 0; 150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t limit = bucketList_->size(); 151f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while ((start + 1) < limit) { 152f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = (start + limit) / 2; 153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i); 154f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UCollationResult nameVsBucket = 155f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode); 156f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (nameVsBucket < 0) { 157f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius limit = i; 158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 159f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius start = i; 160f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start); 163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 164f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = bucket->displayBucket_; 165f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 166f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bucket->displayIndex_; 167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 168f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 169f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius /** All of the buckets, visible and invisible. */ 170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector *bucketList_; 171f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius /** Just the visible buckets. */ 172f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector *immutableVisibleList_; 173f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}; 174f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 175f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::~ImmutableIndex() { 176f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 177f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete collatorPrimaryOnly_; 178f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 179f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t 181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketCount() const { 182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketCount(); 183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t 186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketIndex( 187f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &name, UErrorCode &errorCode) const { 188f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode); 189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst AlphabeticIndex::Bucket * 192f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const { 193f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0 <= index && index < buckets_->getBucketCount()) { 194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return icu::getBucket(*buckets_->immutableVisibleList_, index); 195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 200f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status) 201f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : inputList_(NULL), 202f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL), 203f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius maxLabelCount_(99), 204f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_(NULL), firstCharsInScripts_(NULL), 205f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_(NULL), collatorPrimaryOnly_(NULL), 206f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_(NULL) { 207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius init(&locale, status); 208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 209f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 210f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 211f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status) 212f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : inputList_(NULL), 213f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL), 214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius maxLabelCount_(99), 215f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_(NULL), firstCharsInScripts_(NULL), 216f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_(collator), collatorPrimaryOnly_(NULL), 217f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_(NULL) { 218f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius init(NULL, status); 219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 220f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::~AlphabeticIndex() { 224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete collator_; 225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete collatorPrimaryOnly_; 226f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete firstCharsInScripts_; 227f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 228f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete inputList_; 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete initialLabels_; 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) { 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho initialLabels_->addAll(additions); 238f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) { 244f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius addIndexExemplars(locale, status); 245f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 250f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) { 251f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 252f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // In C++, the ImmutableIndex must own its copy of the BucketList, 253f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // even if it contains no records, for proper memory management. 254f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We could clone the buckets_ if they are not NULL, 255f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // but that would be worth it only if this method is called multiple times, 256f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // or called after using the old-style bucket iterator API. 257f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode)); 258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<RuleBasedCollator> coll( 259f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone())); 260f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immutableBucketList.isNull() || coll.isNull()) { 261f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 262f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 263f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 264f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias()); 265f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immIndex == NULL) { 266f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 267f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 268f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 269f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The ImmutableIndex adopted its parameter objects. 270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius immutableBucketList.orphan(); 271f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius coll.orphan(); 272f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return immIndex; 273f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 274f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketCount(UErrorCode &status) { 276f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 280f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketCount(); 281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getRecordCount(UErrorCode &status) { 285f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status) || inputList_ == NULL) { 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 288f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return inputList_->size(); 289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 291f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { 292f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); 293f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return; } 294f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 295f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0); 296f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &overflowBoundary = 297f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1); 298f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 299f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We make a sorted array of elements. 300f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Some of the input may be redundant. 301f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". 302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We filter out those cases. 303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator iter(*initialLabels_); 304f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (iter.next()) { 305f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *item = &iter.getString(); 306f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UnicodeString> ownedItem; 307f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool checkDistinct; 308f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t itemLength = item->length(); 309f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!item->hasMoreChar32Than(0, itemLength, 1)) { 310f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = FALSE; 311f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if(item->charAt(itemLength - 1) == 0x2a && // '*' 312f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius item->charAt(itemLength - 2) != 0x2a) { 313f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Use a label if it is marked with one trailing star, 314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // even if the label string sorts the same when all contractions are suppressed. 315f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1)); 316f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius item = ownedItem.getAlias(); 317f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (item == NULL) { 318f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 319f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 321f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = FALSE; 322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 323f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = TRUE; 324f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 325f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) { 326f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore a primary-ignorable or non-alphabetic index character. 327f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) { 328f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore an index characters that will land in the overflow bucket. 329f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (checkDistinct && 330f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) { 331f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore a multi-code point index character that does not sort distinctly 332f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // from the sequence of its separate characters. 333f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_); 335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (insertionPoint < 0) { 336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.insertElementAt( 337f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode); 338f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 339f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint); 340f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) { 341f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.setElementAt( 342f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedString(*item, ownedItem, errorCode), insertionPoint); 343f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 344f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 347f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return; } 348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 349f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // if the result is still too large, cut down to maxCount elements, by removing every nth element 350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 351f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t size = indexCharacters.size() - 1; 352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (size > maxLabelCount_) { 353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t count = 0; 354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t old = -1; 355f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < indexCharacters.size();) { 356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++count; 357f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t bump = count * maxLabelCount_ / size; 358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (bump == old) { 359f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.removeElementAt(i); 360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho old = bump; 362f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ++i; 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 366f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 368f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 369f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 370f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UnicodeString &fixLabel(const UnicodeString ¤t, UnicodeString &temp) { 371f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!current.startsWith(BASE, BASE_LENGTH)) { 372f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return current; 373f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 374f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar rest = current.charAt(BASE_LENGTH); 375f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0x2800 < rest && rest <= 0x28FF) { // stroke count 376f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t count = rest-0x2800; 377f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.setTo((UChar)(0x30 + count % 10)); 378f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (count >= 10) { 379f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius count /= 10; 380f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.insert(0, (UChar)(0x30 + count % 10)); 381f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (count >= 10) { 382f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius count /= 10; 383f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.insert(0, (UChar)(0x30 + count)); 384f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 385f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 386f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return temp.append((UChar)0x5283); 387f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 388f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return temp.setTo(current, BASE_LENGTH); 389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 391f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool hasMultiplePrimaryWeights( 392f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius CollationElementIterator &cei, int32_t variableTop, 393f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &s, UErrorCode &errorCode) { 394f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius cei.setText(s, errorCode); 395f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool seenPrimary = FALSE; 396f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 397f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t ce32 = cei.next(errorCode); 398f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (ce32 == CollationElementIterator::NULLORDER) { 399f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 400f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 401f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t p = CollationElementIterator::primaryOrder(ce32); 402f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (p > variableTop && (ce32 & 0xc0) != 0xc0) { 403f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // not primary ignorable, and not a continuation CE 404f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (seenPrimary) { 405f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return TRUE; 406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 407f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius seenPrimary = TRUE; 408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 409f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 410f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 413f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 415f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusBucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { 416f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Initialize indexCharacters. 417f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector indexCharacters(errorCode); 418f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.setDeleter(uprv_deleteUObject); 419f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initLabels(indexCharacters, errorCode); 420f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 421f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 422f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Variables for hasMultiplePrimaryWeights(). 423f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<CollationElementIterator> cei( 424f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->createCollationElementIterator(emptyString_)); 425f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (cei.isNull()) { 426f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 427f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 428f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 429f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t variableTop; 430f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) { 431f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius variableTop = CollationElementIterator::primaryOrder( 432f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius (int32_t)collatorPrimaryOnly_->getVariableTop(errorCode)); 433f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 434f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius variableTop = 0; 435f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 436f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasInvisibleBuckets = FALSE; 437f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 438f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Helper arrays for Chinese Pinyin collation. 439f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *asciiBuckets[26] = { 440f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 441f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL 442f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius }; 443f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *pinyinBuckets[26] = { 444f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 445f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL 446f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius }; 447f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasPinyin = FALSE; 448f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 449f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UVector> bucketList(new UVector(errorCode)); 450f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList.isNull()) { 451f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 452f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 453f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 454f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->setDeleter(uprv_deleteUObject); 455f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 456f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // underflow bucket 457f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW); 458f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 459f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 460f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 461f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 462f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 463f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 464f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 465f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString temp; 466f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 467f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // fix up the list, adding underflow, additions, overflow 468f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Insert inflow labels as needed. 469f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t scriptIndex = -1; 470f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *scriptUpperBoundary = &emptyString_; 471f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < indexCharacters.size(); ++i) { 472f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString ¤t = *getString(indexCharacters, i); 473f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) { 474f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We crossed the script boundary into a new script. 475f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &inflowBoundary = *scriptUpperBoundary; 476f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool skippedScript = FALSE; 477f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 478f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex); 479f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) { 480f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 481f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 482f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius skippedScript = TRUE; 483f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 484f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (skippedScript && bucketList->size() > 1) { 485f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We are skipping one or more scripts, 486f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and we are not just getting out of the underflow label. 487f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW); 488f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 489f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 490f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 491f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 492f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 493f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 494f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 495f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Add a bucket with the current label. 496f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL); 497f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 498f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 499f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 500f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 501f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 502f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Remember ASCII and Pinyin buckets for Pinyin redirects. 503f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar c; 504f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z 505f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius asciiBuckets[c - 0x41] = bucket; 506f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) && 507f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) { 508f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius pinyinBuckets[c - 0x41] = bucket; 509f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasPinyin = TRUE; 510f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 511f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Check for multiple primary weights. 512f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!current.startsWith(BASE, BASE_LENGTH) && 513f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasMultiplePrimaryWeights(*cei, variableTop, current, errorCode) && 514f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) { 515f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // "AE-ligature" or "Sch" etc. 516f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = bucketList->size() - 2;; --i) { 517f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *singleBucket = getBucket(*bucketList, i); 518f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) { 519f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // There is no single-character bucket since the last 520f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // underflow or inflow label. 521f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 522f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 523f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (singleBucket->displayBucket_ == NULL && 524f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius !hasMultiplePrimaryWeights( 525f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *cei, variableTop, singleBucket->lowerBoundary_, errorCode)) { 526f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Add an invisible bucket that redirects strings greater than the expansion 527f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // to the previous single-character bucket. 528f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // For example, after ... Q R S Sch we add Sch\uFFFF->S 529f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S. 530f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(emptyString_, 531f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString(current).append((UChar)0xFFFF), 532f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U_ALPHAINDEX_NORMAL); 533f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 534f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 535f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 536f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 537f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->displayBucket_ = singleBucket; 538f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 539f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasInvisibleBuckets = TRUE; 540f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 541f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 542f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 543f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 544f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 545f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 546f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList->size() == 1) { 547f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // No real labels, show only the underflow label. 548f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); 549f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 550f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 551f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 552f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 553f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 554f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 555f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 556f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // overflow bucket 557f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW); 558f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 559f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 560f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 561f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 562f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); // final 563f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 564f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hasPinyin) { 565f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Redirect Pinyin buckets. 566f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *asciiBucket = NULL; 567f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < 26; ++i) { 568f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (asciiBuckets[i] != NULL) { 569f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius asciiBucket = asciiBuckets[i]; 570f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 571f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (pinyinBuckets[i] != NULL && asciiBucket != NULL) { 572f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius pinyinBuckets[i]->displayBucket_ = asciiBucket; 573f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasInvisibleBuckets = TRUE; 574f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 575f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 576f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 577f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 578f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 579f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!hasInvisibleBuckets) { 580f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); 581f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 582f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 583f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 584f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 585f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 586f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 587f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 588f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Merge inflow buckets that are visually adjacent. 589f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Iterate backwards: Merge inflow into overflow rather than the other way around. 590f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = bucketList->size() - 1; 591f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *nextBucket = getBucket(*bucketList, i); 592f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (--i > 0) { 593f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = getBucket(*bucketList, i); 594f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 595f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius continue; // skip invisible buckets 596f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 597f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) { 598f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) { 599f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->displayBucket_ = nextBucket; 600f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius continue; 601f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 602f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 603f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = bucket; 604f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 605f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 606f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UVector> publicBucketList(new UVector(errorCode)); 607f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList.isNull()) { 608f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 609f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 610f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 611f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Do not call publicBucketList->setDeleter(): 612f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This vector shares its objects with the bucketList. 613f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < bucketList->size(); ++i) { 614f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = getBucket(*bucketList, i); 615f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ == NULL) { 616f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius publicBucketList->addElement(bucket, errorCode); 617f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 618f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 619f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 620f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias()); 621f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 622f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 623f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 624f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 625f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 626f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius publicBucketList.orphan(); 627f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 628f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 629f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 630f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 631f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Creates an index, and buckets and sorts the list of records into the index. 632f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 633f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initBuckets(UErrorCode &errorCode) { 634f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode) || buckets_ != NULL) { 635f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 636f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 637f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_ = createBucketList(errorCode); 638f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) { 639b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 640b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 641b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 642f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Sort the records by name. 643f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Stable sort preserves input order of collation duplicates. 644f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode); 645f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 646f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Now, we traverse all of the input, which is now sorted. 647f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // If the item doesn't go in the current bucket, we find the next bucket that contains it. 648f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This makes the process order n*log(n), since we just sort the list and then do a linear process. 649f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so 650f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // we need to improve it for that case. 651f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 652f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0); 653f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t bucketIndex = 1; 654f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *nextBucket; 655f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *upperBoundary; 656f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketIndex < buckets_->bucketList_->size()) { 657f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); 658f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = &nextBucket->lowerBoundary_; 659f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 660f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = NULL; 661f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = NULL; 662f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 663f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < inputList_->size(); ++i) { 664f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Record *r = getRecord(*inputList_, i); 665f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // if the current bucket isn't the right one, find the one that is 666f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We have a special flag for the last bucket so that we don't look any further 667f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (upperBoundary != NULL && 668f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) { 669f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket = nextBucket; 670f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // now reset the boundary that we compare against 671f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketIndex < buckets_->bucketList_->size()) { 672f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); 673f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = &nextBucket->lowerBoundary_; 674b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 675f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = NULL; 676f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 677f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 678f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // now put the record into the bucket. 679f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *bucket = currentBucket; 680f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 681f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = bucket->displayBucket_; 682f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 683f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->records_ == NULL) { 684f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->records_ = new UVector(errorCode); 685f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->records_ == NULL) { 686f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 687f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 688b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 690f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->records_->addElement(r, errorCode); 691b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 692f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 693b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 694f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::clearBuckets() { 695f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ != NULL) { 696f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 697f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_ = NULL; 698f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius internalResetBucketIterator(); 699f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 700f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 701f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 702f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::internalResetBucketIterator() { 703f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_ = -1; 704f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket_ = NULL; 705b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 706b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 707b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 708f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) { 709f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 710f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Chinese index characters, which are specific to each of the several Chinese tailorings, 711f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // take precedence over the single locale data exemplar set per language. 712f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const char *language = locale.getLanguage(); 713f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 || 714f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius uprv_strcmp(language, "ko") == 0) { 715f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // TODO: This should be done regardless of the language, but it's expensive. 716f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We should add a Collator function (can be @internal) 717f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // to enumerate just the contractions that start with a given code point or string. 718f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (addChineseIndexCharacters(status) || U_FAILURE(status)) { 719f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 720f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 721f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 722f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 723f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status)); 724b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 725b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 726b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 727b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 728b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet exemplars; 729b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); 730b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) { 731f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->addAll(exemplars); 732b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 733b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 734b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR 735b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 736f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The locale data did not include explicit Index characters. 737b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Synthesize a set of them from the locale's standard exemplar characters. 738b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); 739b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 741b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 742b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 743f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // question: should we add auxiliary exemplars? 744f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { 745f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.add(0x61, 0x7A); 746f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 747f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables 748f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // cut down to small list 749f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.remove(0xAC00, 0xD7A3). 750f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). 751f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). 752f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). 753f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xD30C).add(0xD558); 754f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 755f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block 756f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // cut down to small list 757f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // make use of the fact that Ethiopic is allocated in 8's, where 758f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // the base is 0 mod 8. 759f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSet ethiopic( 760f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); 761f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator it(ethiopic); 762f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (it.next() && !it.isString()) { 763f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if ((it.getCodepoint() & 0x7) != 0) { 764f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.remove(it.getCodepoint()); 765f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 766f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 767f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 768f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 769b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Upper-case any that aren't already so. 770b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (We only do this for synthesized index characters.) 771b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSetIterator it(exemplars); 772b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString upperC; 773b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (it.next()) { 774b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UnicodeString &exemplarC = it.getString(); 775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho upperC = exemplarC; 776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho upperC.toUpper(locale); 777f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->add(upperC); 778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 779f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 781f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) { 782f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSet contractions; 783f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ucol_getContractionsAndExpansions(collatorPrimaryOnly_->getUCollator(), 784f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius contractions.toUSet(), NULL, FALSE, &errorCode); 785f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return FALSE; } 786f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString firstHanBoundary; 787f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasPinyin = FALSE; 788f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator iter(contractions); 789f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (iter.next()) { 790f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &s = iter.getString(); 791f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (s.startsWith(BASE, BASE_LENGTH)) { 792f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->add(s); 793f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (firstHanBoundary.isEmpty() || 794f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(s, firstHanBoundary, errorCode) < 0) { 795f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstHanBoundary = s; 796f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 797f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar c = s.charAt(s.length() - 1); 798f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0x41 <= c && c <= 0x5A) { // A-Z 799f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasPinyin = TRUE; 800f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 801f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 802b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 803f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hasPinyin) { 804f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->add(0x41, 0x5A); // A-Z 805f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 806f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!firstHanBoundary.isEmpty()) { 807f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The hardcoded list of script boundaries includes U+4E00 808f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // which is tailored to not be the first primary 809f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // in all Chinese tailorings except "unihan". 810f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Replace U+4E00 with the first boundary string from the tailoring. 811f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // TODO: This becomes obsolete when the root collator gets 812f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // reliable script-first-primary mappings. 813f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t hanIndex = binarySearch( 814f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *firstCharsInScripts_, UnicodeString((UChar)0x4E00), *collatorPrimaryOnly_); 815f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hanIndex >= 0) { 816f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString *fh = new UnicodeString(firstHanBoundary); 817f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (fh == NULL) { 818f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 819f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 820b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 821f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->setElementAt(fh, hanIndex); 822b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 823f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return TRUE; 824f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 825f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 827b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 829b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 830b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 831b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Return the string with interspersed CGJs. Input must have more than 2 codepoints. 832b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 833f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar CGJ = 0x034F; 834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUnicodeString AlphabeticIndex::separated(const UnicodeString &item) { 835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString result; 836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (item.length() == 0) { 837b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 839b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i = 0; 840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (;;) { 841b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar32 cp = item.char32At(i); 842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.append(cp); 843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho i = item.moveIndex32(i, 1); 844b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i >= item.length()) { 845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 846b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 847b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.append(CGJ); 848b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 849b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 851b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const { 854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 855b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const { 859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst RuleBasedCollator &AlphabeticIndex::getCollator() const { 864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // There are no known non-RuleBasedCollator collators, and none ever expected. 865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // But, in case that changes, better a null pointer than a wrong type. 866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *dynamic_cast<RuleBasedCollator *>(collator_); 867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 870b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getInflowLabel() const { 871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return inflowLabel_; 872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 874b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getOverflowLabel() const { 875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return overflowLabel_; 876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 879b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getUnderflowLabel() const { 880b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return underflowLabel_; 881b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 882b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 883b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 884b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 885b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inflowLabel_ = label; 886f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 887b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 889b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 890b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 891b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 892b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho overflowLabel_ = label; 893f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 894b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 896b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 897b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 898b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 899b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho underflowLabel_ = label; 900f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 901b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 902b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 903b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 904b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 905b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getMaxLabelCount() const { 906b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return maxLabelCount_; 907b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 908b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 909b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 910b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) { 911b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 912b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 913b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 914b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (maxLabelCount <= 0) { 915b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ILLEGAL_ARGUMENT_ERROR; 916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 918b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho maxLabelCount_ = maxLabelCount; 919f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 922b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 923b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 924b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 925b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// init() - Common code for constructors. 926b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 927b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 928f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::init(const Locale *locale, UErrorCode &status) { 929f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 930f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (locale == NULL && collator_ == NULL) { 931f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 932b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 933b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 934f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 935b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho initialLabels_ = new UnicodeSet(); 936f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (initialLabels_ == NULL) { 937f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 938f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 939f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 940b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 941f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inflowLabel_.setTo((UChar)0x2026); // Ellipsis 942b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho overflowLabel_ = inflowLabel_; 943b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho underflowLabel_ = inflowLabel_; 944b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 945f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collator_ == NULL) { 946f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_ = static_cast<RuleBasedCollator *>(Collator::createInstance(*locale, status)); 947f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 948f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collator_ == NULL) { 949f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 950f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 951f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 952f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 953f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone()); 954f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_ == NULL) { 955f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 956b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 957b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 958f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status); 959f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_ = firstStringsInScript(status); 960f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 961f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status); 962f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString _4E00((UChar)0x4E00); 963f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString _1100((UChar)0x1100); 964f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString _1112((UChar)0x1112); 965f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(_4E00, _1112, status) <= 0 && 966f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(_1100, _4E00, status) <= 0) { 967f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The standard Korean tailoring sorts Hanja (Han characters) 968f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // as secondary differences from Hangul syllables. 969f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This makes U+4E00 not useful as a Han-script boundary. 970f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // TODO: This becomes obsolete when the root collator gets 971f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // reliable script-first-primary mappings. 972f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t hanIndex = binarySearch( 973f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *firstCharsInScripts_, _4E00, *collatorPrimaryOnly_); 974f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hanIndex >= 0) { 975f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->removeElementAt(hanIndex); 976b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 977f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 978f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Guard against a degenerate collator where 979f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // some script boundary strings are primary ignorable. 980f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 981f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 982f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (firstCharsInScripts_->isEmpty()) { 983f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // AlphabeticIndex requires some non-ignorable script boundary strings. 984f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 985f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 986b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 987f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare( 988f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)), 989f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius emptyString_, status) == UCOL_EQUAL) { 990f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->removeElementAt(0); 991f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 992f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 993b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 994b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 995b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 996f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (locale != NULL) { 997f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius addIndexExemplars(*locale, status); 998b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 999b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1000b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1001b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1002b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1003b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Comparison function for UVector<UnicodeString *> sorting with a collator. 1004b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1005b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 1006f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right) { 1007103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *leftElement = static_cast<const UElement *>(left); 1008103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *rightElement = static_cast<const UElement *>(right); 1009103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer); 1010103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer); 1011b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1012b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (leftString == rightString) { 1013b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Catches case where both are NULL 1014b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1015b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (leftString == NULL) { 1017b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 1; 1018b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 1019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (rightString == NULL) { 1020b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return -1; 1021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1022f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const Collator *col = static_cast<const Collator *>(context); 1023f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 1024f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return col->compare(*leftString, *rightString, errorCode); 1025b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1027b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1028b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Comparison function for UVector<Record *> sorting with a collator. 1029b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1030b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 1031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right) { 1032103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *leftElement = static_cast<const UElement *>(left); 1033103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *rightElement = static_cast<const UElement *>(right); 1034103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer); 1035103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer); 1036b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const Collator *col = static_cast<const Collator *>(context); 1037f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 1038f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return col->compare(leftRec->name_, rightRec->name_, errorCode); 1039b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1040b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1041b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1042f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 1043f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * This list contains one character per script that has the 1044f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * lowest primary weight for that script in the root collator. 1045f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * This list will be copied and sorted to account for script reordering. 1046f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 1047f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * <p>TODO: This is fragile. If the first character of a script is tailored 1048f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * so that it does not map to the script's lowest primary weight any more, 1049f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * then the buckets will be off. 1050f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * There are hacks in the code to handle the known CJK tailorings of U+4E00. 1051f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 1052f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * <p>We use "A" not "a" because the en_US_POSIX tailoring sorts A primary-before a. 1053f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 1054f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Keep this in sync with HACK_FIRST_CHARS_IN_SCRIPTS in 1055f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * ICU4J main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java 1056f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 1057f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar HACK_FIRST_CHARS_IN_SCRIPTS[] = { 1058f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x41, 0, 0x03B1, 0, 1059f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x2C81, 0, 0x0430, 0, 0x2C30, 0, 0x10D0, 0, 0x0561, 0, 0x05D0, 0, 0xD802, 0xDD00, 0, 0x0800, 0, 0x0621, 0, 0x0710, 0, 1060f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x0780, 0, 0x07CA, 0, 0x2D30, 0, 0x1200, 0, 0x0950, 0, 0x0985, 0, 0x0A74, 0, 0x0AD0, 0, 0x0B05, 0, 0x0BD0, 0, 1061f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x0C05, 0, 0x0C85, 0, 0x0D05, 0, 0x0D85, 0, 1062f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xAAF2, 0, // Meetei Mayek 1063f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xA800, 0, 0xA882, 0, 0xD804, 0xDC83, 0, 1064f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x111C4), U16_TRAIL(0x111C4), 0, // Sharada 1065f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x11680), U16_TRAIL(0x11680), 0, // Takri 1066f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x1B83, 0, 1067f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD802, 0xDE00, 0, 0x0E01, 0, 1068f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x0EDE, 0, // Lao 1069f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xAA80, 0, 0x0F40, 0, 0x1C00, 0, 0xA840, 0, 0x1900, 0, 0x1700, 0, 0x1720, 0, 1070f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x1740, 0, 0x1760, 0, 0x1A00, 0, 0xA930, 0, 0xA90A, 0, 0x1000, 0, 1071f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x11103), U16_TRAIL(0x11103), 0, // Chakma 1072f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x1780, 0, 0x1950, 0, 0x1980, 0, 0x1A20, 0, 1073f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xAA00, 0, 0x1B05, 0, 0xA984, 0, 0x1880, 0, 0x1C5A, 0, 0x13A0, 0, 0x1401, 0, 0x1681, 0, 0x16A0, 0, 0xD803, 0xDC00, 0, 1074f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xA500, 0, 0xA6A0, 0, 0x1100, 0, 0x3041, 0, 0x30A1, 0, 0x3105, 0, 0xA000, 0, 0xA4F8, 0, 1075f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x16F00), U16_TRAIL(0x16F00), 0, // Miao 1076f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD800, 0xDE80, 0, 1077f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD800, 0xDEA0, 0, 0xD802, 0xDD20, 0, 0xD800, 0xDF00, 0, 0xD800, 0xDF30, 0, 0xD801, 0xDC28, 0, 0xD801, 0xDC50, 0, 1078f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD801, 0xDC80, 0, 1079f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x110D0), U16_TRAIL(0x110D0), 0, // Sora Sompeng 1080f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD800, 0xDC00, 0, 0xD802, 0xDC00, 0, 0xD802, 0xDE60, 0, 0xD802, 0xDF00, 0, 0xD802, 0xDC40, 0, 1081f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD802, 0xDF40, 0, 0xD802, 0xDF60, 0, 0xD800, 0xDF80, 0, 0xD800, 0xDFA0, 0, 0xD808, 0xDC00, 0, 0xD80C, 0xDC00, 0, 1082f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x109A0), U16_TRAIL(0x109A0), 0, // Meroitic Cursive 1083f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x10980), U16_TRAIL(0x10980), 0, // Meroitic Hieroglyphs 1084f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x4E00, 0, 1085f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // TODO: The overflow bucket's lowerBoundary string should be the 1086f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // first item after the last reordering group in the collator's script order. 1087f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This should normally be the first Unicode code point 1088f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // that is unassigned (U+0378 in Unicode 6.3) and untailored. 1089f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // However, at least up to ICU 51 the Hani reordering group includes 1090f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // unassigned code points, 1091f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and there is no stable string for the start of the trailing-weights range. 1092f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The only known string that sorts "high" is U+FFFF. 1093f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // When ICU separates Hani vs. unassigned reordering groups, we need to fix this, 1094f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and fix relevant test code. 1095f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ideally, FractionalUCA.txt will have a "script first primary" 1096f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // for unassigned code points. 1097f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xFFFF, 0 1098f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}; 1099b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { 1101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 1103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UVector *dest = new UVector(status); 1105103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (dest == NULL) { 1106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 1108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1109103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius dest->setDeleter(uprv_deleteUObject); 1110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UChar *src = HACK_FIRST_CHARS_IN_SCRIPTS; 1111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UChar *limit = src + LENGTHOF(HACK_FIRST_CHARS_IN_SCRIPTS); 1112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 1113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return dest; 1115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString *str = new UnicodeString(src, -1); 1117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (str == NULL) { 1118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_MEMORY_ALLOCATION_ERROR; 1119f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return dest; 1120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius dest->addElement(str, status); 1122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius src += str->length() + 1; 1123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while (src < limit); 1124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return dest; 1125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 1129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 1131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Returns true if one index character string is "better" than the other. 1132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Shorter NFKD is better, and otherwise NFKD-binary-less-than is 1133f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * better, and otherwise binary-less-than is better. 1134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 1135f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, 1136f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &one, const UnicodeString &other) { 1137f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This is called with primary-equal strings, but never with one.equals(other). 1138f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode status = U_ZERO_ERROR; 1139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString n1 = nfkdNormalizer.normalize(one, status); 1140f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString n2 = nfkdNormalizer.normalize(other, status); 1141f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return FALSE; } 1142f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t result = n1.countChar32() - n2.countChar32(); 1143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result != 0) { 1144f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return result < 0; 1145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = n1.compareCodePointOrder(n2); 1147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result != 0) { 1148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return result < 0; 1149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return one.compareCodePointOrder(other) < 0; 1151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 1154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Constructor & Destructor for AlphabeticIndex::Record 1157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Records are internal only, instances are not directly surfaced in the public API. 1159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// This class is mostly struct-like, with all public fields. 1160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::Record::Record(const UnicodeString &name, const void *data) 1162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : name_(name), data_(data) {} 1163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 1164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Record::~Record() { 1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) { 1169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1172f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (inputList_ == NULL) { 1173f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_ = new UVector(status); 1174f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (inputList_ == NULL) { 1175f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1176f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return *this; 1177f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1178f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->setDeleter(alphaIndex_deleteRecord); 1179f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Record *r = new Record(name, data); 1181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (r == NULL) { 1182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return *this; 1184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->addElement(r, status); 1186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 1187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::string ss; 1188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::string ss2; 1189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << 1190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl; 1191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) { 1196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) { 1197f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->removeAllElements(); 1198f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 1199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) { 1204f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 1205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status); 1209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex() const { 1213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return labelsIterIndex_; 1214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextBucket(UErrorCode &status) { 1218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ == NULL && currentBucket_ != NULL) { 1222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ENUM_OUT_OF_SYNC_ERROR; 1223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1225f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 1226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++labelsIterIndex_; 1230f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (labelsIterIndex_ >= buckets_->getBucketCount()) { 1231f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_ = buckets_->getBucketCount(); 1232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1234f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_); 1235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho resetRecordIterator(); 1236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 1237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getBucketLabel() const { 1240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ != NULL) { 1241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->label_; 1242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1243f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return emptyString_; 1244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const { 1249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ != NULL) { 1250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->labelType_; 1251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return U_ALPHAINDEX_NORMAL; 1253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketRecordCount() const { 1258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL) { 1259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->records_->size(); 1260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) { 1267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius internalResetBucketIterator(); 1271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextRecord(UErrorCode &status) { 1276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ == NULL) { 1280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We are trying to iterate over the items in a bucket, but there is no 1281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // current bucket from the enumeration of buckets. 1282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_INVALID_STATE_ERROR; 1283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1285f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ == NULL) { 1286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ENUM_OUT_OF_SYNC_ERROR; 1287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1289f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_->records_ == NULL) { 1290f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 1291f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++itemsIterIndex_; 1293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (itemsIterIndex_ >= currentBucket_->records_->size()) { 1294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ = currentBucket_->records_->size(); 1295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 1298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getRecordName() const { 1302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *retStr = &emptyString_; 1303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL && 1304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ >= 0 && 1305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ < currentBucket_->records_->size()) { 1306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); 1307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retStr = &item->name_; 1308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *retStr; 1310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst void *AlphabeticIndex::getRecordData() const { 1313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const void *retPtr = NULL; 1314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL && 1315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ >= 0 && 1316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ < currentBucket_->records_->size()) { 1317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); 1318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retPtr = item->data_; 1319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return retPtr; 1321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::resetRecordIterator() { 1325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ = -1; 1326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::Bucket(const UnicodeString &label, 1332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UnicodeString &lowerBoundary, 1333f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UAlphabeticIndexLabelType type) 1334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : label_(label), lowerBoundary_(lowerBoundary), labelType_(type), 1335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius displayBucket_(NULL), displayIndex_(-1), 1336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius records_(NULL) { 1337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::~Bucket() { 1341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete records_; 1342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 1345103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 1346103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif 1347