1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 3f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* Copyright (C) 2009-2013, International Business Machines Corporation and 4f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius* others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 10103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION 11103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/alphaindex.h" 13f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/coleitr.h" 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/coll.h" 15f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "unicode/localpointer.h" 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/normalizer2.h" 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/tblcoll.h" 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ulocdata.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uniset.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h" 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/usetiter.h" 22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 24f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#include "cmemory.h" 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cstring.h" 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h" 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uvector.h" 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho//#include <string> 30103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius//#include <iostream> 31f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 32f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 33f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 36f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 38f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 39f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Prefix string for Chinese index buckets. 40f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes 41f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 42f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UChar BASE[1] = { 0xFDD0 }; 43f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst int32_t BASE_LENGTH = 1; 44f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 45f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, 46f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &one, const UnicodeString &other); 47f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 48f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 51f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right); 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right); 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// UVector<Record *> support function, delete a Record. 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic void U_CALLCONV 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoalphaIndex_deleteRecord(void *obj) { 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete static_cast<AlphabeticIndex::Record *>(obj); 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 62f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 64f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned, 65f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode &errorCode) { 66f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 67f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (owned.isValid()) { 68f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return owned.orphan(); 69f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 70f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString *p = new UnicodeString(s); 71f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (p == NULL) { 72f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 73f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 74f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return p; 75f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 77f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline UnicodeString *getString(const UVector &list, int32_t i) { 78f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<UnicodeString *>(list[i]); 79f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 81f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) { 82f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<AlphabeticIndex::Bucket *>(list[i]); 83f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 85f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusinline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) { 86f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return static_cast<AlphabeticIndex::Record *>(list[i]); 87f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 88f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 89f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 90f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Like Java Collections.binarySearch(List, String, Comparator). 91f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 92f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * @return the index>=0 where the item was found, 93f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * or the index<0 for inserting the string at ~index in sorted order 94f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 95f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) { 96f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (list.size() == 0) { return ~0; } 97f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t start = 0; 98f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t limit = list.size(); 99f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 100f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = (start + limit) / 2; 101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i)); 102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UCollationResult cmp = coll.compare(s, *si, errorCode); 104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (cmp == UCOL_EQUAL) { 105f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return i; 106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (cmp < 0) { 107f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (i == start) { 108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return ~start; // insert s before *si 109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 110f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius limit = i; 111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 112f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (i == start) { 113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return ~(start + 1); // insert s after *si 114f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 115f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius start = i; 116f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 120f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 121f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 122f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// The BucketList is not in the anonymous namespace because only Clang 123f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// seems to support its use in other classes from there. 124f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius// However, we also don't need U_I18N_API because it is not used from outside the i18n library. 125f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusclass BucketList : public UObject { 126f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliuspublic: 127f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList(UVector *bucketList, UVector *publicBucketList) 128f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : bucketList_(bucketList), immutableVisibleList_(publicBucketList) { 129f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t displayIndex = 0; 130f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < publicBucketList->size(); ++i) { 131f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++; 132f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 133f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // The virtual destructor must not be inline. 1368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // See ticket #8454 for details. 1378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius virtual ~BucketList(); 138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t getBucketCount() const { 140f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return immutableVisibleList_->size(); 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 142f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 143f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly, 144f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode &errorCode) { 145f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // binary search 146f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t start = 0; 147f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t limit = bucketList_->size(); 148f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while ((start + 1) < limit) { 149f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = (start + limit) / 2; 150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i); 151f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UCollationResult nameVsBucket = 152f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode); 153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (nameVsBucket < 0) { 154f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius limit = i; 155f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 156f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius start = i; 157f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 159f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start); 160f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 161f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = bucket->displayBucket_; 162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 163f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bucket->displayIndex_; 164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 165f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 166f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius /** All of the buckets, visible and invisible. */ 167f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector *bucketList_; 168f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius /** Just the visible buckets. */ 169f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector *immutableVisibleList_; 170f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}; 171f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusBucketList::~BucketList() { 1738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete bucketList_; 1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if (immutableVisibleList_ != bucketList_) { 1758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius delete immutableVisibleList_; 1768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 1778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 1788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 179f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::~ImmutableIndex() { 180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete collatorPrimaryOnly_; 182f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 183f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 184f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t 185f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketCount() const { 186f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketCount(); 187f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 188f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 189f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusint32_t 190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucketIndex( 191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &name, UErrorCode &errorCode) const { 192f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode); 193f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst AlphabeticIndex::Bucket * 196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const { 197f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0 <= index && index < buckets_->getBucketCount()) { 198f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return icu::getBucket(*buckets_->immutableVisibleList_, index); 199f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 200f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 204f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status) 205f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : inputList_(NULL), 206f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL), 207f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius maxLabelCount_(99), 208f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_(NULL), firstCharsInScripts_(NULL), 209f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_(NULL), collatorPrimaryOnly_(NULL), 210f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_(NULL) { 211f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius init(&locale, status); 212f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 213f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 215f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status) 216f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : inputList_(NULL), 217f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL), 218f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius maxLabelCount_(99), 219f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_(NULL), firstCharsInScripts_(NULL), 220f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_(collator), collatorPrimaryOnly_(NULL), 221f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_(NULL) { 222f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius init(NULL, status); 223f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 224f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 225f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::~AlphabeticIndex() { 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete collator_; 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete collatorPrimaryOnly_; 230f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete firstCharsInScripts_; 231f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 232f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete inputList_; 233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete initialLabels_; 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) { 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho initialLabels_->addAll(additions); 242f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) { 2480a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius addIndexExemplars(&locale, status); 249f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 254f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) { 255f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 256f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // In C++, the ImmutableIndex must own its copy of the BucketList, 257f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // even if it contains no records, for proper memory management. 258f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We could clone the buckets_ if they are not NULL, 259f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // but that would be worth it only if this method is called multiple times, 260f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // or called after using the old-style bucket iterator API. 261f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode)); 262f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<RuleBasedCollator> coll( 263f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone())); 264f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immutableBucketList.isNull() || coll.isNull()) { 265f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 266f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 267f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 268f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias()); 269f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (immIndex == NULL) { 270f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 271f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 272f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 273f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The ImmutableIndex adopted its parameter objects. 274f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius immutableBucketList.orphan(); 275f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius coll.orphan(); 276f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return immIndex; 277f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 278f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketCount(UErrorCode &status) { 280f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 284f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketCount(); 285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getRecordCount(UErrorCode &status) { 289f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status) || inputList_ == NULL) { 290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 292f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return inputList_->size(); 293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 295f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { 296f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); 297f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return; } 298f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 299f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0); 300f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &overflowBoundary = 301f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1); 302f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We make a sorted array of elements. 304f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Some of the input may be redundant. 305f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". 306f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We filter out those cases. 307f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator iter(*initialLabels_); 308f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (iter.next()) { 309f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *item = &iter.getString(); 310f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UnicodeString> ownedItem; 311f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool checkDistinct; 312f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t itemLength = item->length(); 313f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!item->hasMoreChar32Than(0, itemLength, 1)) { 314f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = FALSE; 315f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if(item->charAt(itemLength - 1) == 0x2a && // '*' 316f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius item->charAt(itemLength - 2) != 0x2a) { 317f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Use a label if it is marked with one trailing star, 318f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // even if the label string sorts the same when all contractions are suppressed. 319f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1)); 320f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius item = ownedItem.getAlias(); 321f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (item == NULL) { 322f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 323f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 325f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = FALSE; 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 327f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius checkDistinct = TRUE; 328f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 329f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) { 330f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore a primary-ignorable or non-alphabetic index character. 331f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) { 332f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore an index characters that will land in the overflow bucket. 333f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (checkDistinct && 334f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) { 335f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ignore a multi-code point index character that does not sort distinctly 336f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // from the sequence of its separate characters. 337f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 338f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_); 339f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (insertionPoint < 0) { 340f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.insertElementAt( 341f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode); 342f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 343f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint); 344f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) { 345f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.setElementAt( 346f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ownedString(*item, ownedItem, errorCode), insertionPoint); 347f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 348f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 351f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return; } 352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 353f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // if the result is still too large, cut down to maxCount elements, by removing every nth element 354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 355f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t size = indexCharacters.size() - 1; 356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (size > maxLabelCount_) { 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t count = 0; 358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t old = -1; 359f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < indexCharacters.size();) { 360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++count; 361f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t bump = count * maxLabelCount_ / size; 362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (bump == old) { 363f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.removeElementAt(i); 364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho old = bump; 366f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ++i; 367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 370f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 372f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 373f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 374f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusconst UnicodeString &fixLabel(const UnicodeString ¤t, UnicodeString &temp) { 375f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!current.startsWith(BASE, BASE_LENGTH)) { 376f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return current; 377f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 378f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar rest = current.charAt(BASE_LENGTH); 379f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0x2800 < rest && rest <= 0x28FF) { // stroke count 380f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t count = rest-0x2800; 381f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.setTo((UChar)(0x30 + count % 10)); 382f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (count >= 10) { 383f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius count /= 10; 384f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.insert(0, (UChar)(0x30 + count % 10)); 385f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (count >= 10) { 386f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius count /= 10; 387f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius temp.insert(0, (UChar)(0x30 + count)); 388f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 389f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 390f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return temp.append((UChar)0x5283); 391f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 392f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return temp.setTo(current, BASE_LENGTH); 393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 395f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool hasMultiplePrimaryWeights( 396f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius CollationElementIterator &cei, int32_t variableTop, 397f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &s, UErrorCode &errorCode) { 398f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius cei.setText(s, errorCode); 399f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool seenPrimary = FALSE; 400f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 401f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t ce32 = cei.next(errorCode); 402f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (ce32 == CollationElementIterator::NULLORDER) { 403f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 404f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 405f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t p = CollationElementIterator::primaryOrder(ce32); 406f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (p > variableTop && (ce32 & 0xc0) != 0xc0) { 407f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // not primary ignorable, and not a continuation CE 408f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (seenPrimary) { 409f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return TRUE; 410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 411f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius seenPrimary = TRUE; 412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 413f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 414f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 417f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 419f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusBucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { 420f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Initialize indexCharacters. 421f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UVector indexCharacters(errorCode); 422f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius indexCharacters.setDeleter(uprv_deleteUObject); 423f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initLabels(indexCharacters, errorCode); 424f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 425f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 426f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Variables for hasMultiplePrimaryWeights(). 427f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<CollationElementIterator> cei( 428f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->createCollationElementIterator(emptyString_)); 429f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (cei.isNull()) { 430f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 431f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 432f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 433f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t variableTop; 434f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) { 435f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius variableTop = CollationElementIterator::primaryOrder( 436f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius (int32_t)collatorPrimaryOnly_->getVariableTop(errorCode)); 437f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 438f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius variableTop = 0; 439f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 440f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasInvisibleBuckets = FALSE; 441f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 442f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Helper arrays for Chinese Pinyin collation. 443f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *asciiBuckets[26] = { 444f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 445f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL 446f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius }; 447f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *pinyinBuckets[26] = { 448f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 449f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL 450f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius }; 451f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasPinyin = FALSE; 452f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 453f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UVector> bucketList(new UVector(errorCode)); 454f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList.isNull()) { 455f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 456f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 457f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 458f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->setDeleter(uprv_deleteUObject); 459f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 460f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // underflow bucket 461f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW); 462f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 463f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 464f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 465f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 466f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 467f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 468f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 469f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString temp; 470f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 471f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // fix up the list, adding underflow, additions, overflow 472f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Insert inflow labels as needed. 473f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t scriptIndex = -1; 474f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *scriptUpperBoundary = &emptyString_; 475f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < indexCharacters.size(); ++i) { 476f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString ¤t = *getString(indexCharacters, i); 477f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) { 478f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We crossed the script boundary into a new script. 479f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &inflowBoundary = *scriptUpperBoundary; 480f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool skippedScript = FALSE; 481f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 482f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex); 483f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) { 484f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 485f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 486f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius skippedScript = TRUE; 487f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 488f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (skippedScript && bucketList->size() > 1) { 489f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We are skipping one or more scripts, 490f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and we are not just getting out of the underflow label. 491f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW); 492f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 493f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 494f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 495f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 496f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 497f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 498f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 499f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Add a bucket with the current label. 500f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL); 501f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 502f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 503f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 504f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 505f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 506f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Remember ASCII and Pinyin buckets for Pinyin redirects. 507f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar c; 508f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z 509f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius asciiBuckets[c - 0x41] = bucket; 510f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) && 511f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) { 512f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius pinyinBuckets[c - 0x41] = bucket; 513f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasPinyin = TRUE; 514f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 515f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Check for multiple primary weights. 516f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!current.startsWith(BASE, BASE_LENGTH) && 517f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasMultiplePrimaryWeights(*cei, variableTop, current, errorCode) && 518f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) { 519f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // "AE-ligature" or "Sch" etc. 520f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = bucketList->size() - 2;; --i) { 521f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *singleBucket = getBucket(*bucketList, i); 522f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) { 523f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // There is no single-character bucket since the last 524f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // underflow or inflow label. 525f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 526f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 527f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (singleBucket->displayBucket_ == NULL && 528f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius !hasMultiplePrimaryWeights( 529f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *cei, variableTop, singleBucket->lowerBoundary_, errorCode)) { 530f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Add an invisible bucket that redirects strings greater than the expansion 531f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // to the previous single-character bucket. 532f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // For example, after ... Q R S Sch we add Sch\uFFFF->S 533f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S. 534f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(emptyString_, 535f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString(current).append((UChar)0xFFFF), 536f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U_ALPHAINDEX_NORMAL); 537f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 538f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 539f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 540f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 541f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->displayBucket_ = singleBucket; 542f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); 543f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasInvisibleBuckets = TRUE; 544f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 545f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 546f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 547f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 548f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 549f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 550f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList->size() == 1) { 551f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // No real labels, show only the underflow label. 552f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); 553f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 554f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 555f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 556f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 557f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 558f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 559f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 560f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // overflow bucket 561f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW); 562f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket == NULL) { 563f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 564f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 565f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 566f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList->addElement(bucket, errorCode); // final 567f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 568f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hasPinyin) { 569f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Redirect Pinyin buckets. 570f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *asciiBucket = NULL; 571f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < 26; ++i) { 572f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (asciiBuckets[i] != NULL) { 573f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius asciiBucket = asciiBuckets[i]; 574f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 575f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (pinyinBuckets[i] != NULL && asciiBucket != NULL) { 576f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius pinyinBuckets[i]->displayBucket_ = asciiBucket; 577f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasInvisibleBuckets = TRUE; 578f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 579f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 580f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 581f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 582f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 583f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!hasInvisibleBuckets) { 584f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias()); 585f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 586f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 587f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 588f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 589f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 590f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 591f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 592f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Merge inflow buckets that are visually adjacent. 593f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Iterate backwards: Merge inflow into overflow rather than the other way around. 594f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t i = bucketList->size() - 1; 595f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *nextBucket = getBucket(*bucketList, i); 596f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (--i > 0) { 597f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = getBucket(*bucketList, i); 598f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 599f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius continue; // skip invisible buckets 600f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 601f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) { 602f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) { 603f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->displayBucket_ = nextBucket; 604f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius continue; 605f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 606f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 607f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = bucket; 608f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 609f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 610f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius LocalPointer<UVector> publicBucketList(new UVector(errorCode)); 611f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketList.isNull()) { 612f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 613f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 614f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 615f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Do not call publicBucketList->setDeleter(): 616f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This vector shares its objects with the bucketList. 617f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < bucketList->size(); ++i) { 618f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = getBucket(*bucketList, i); 619f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ == NULL) { 620f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius publicBucketList->addElement(bucket, errorCode); 621f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 622f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 623f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return NULL; } 624f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias()); 625f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bl == NULL) { 626f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 627f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return NULL; 628f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 629f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucketList.orphan(); 630f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius publicBucketList.orphan(); 631f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return bl; 632f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 633f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 634f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 635f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Creates an index, and buckets and sorts the list of records into the index. 636f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 637f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::initBuckets(UErrorCode &errorCode) { 638f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode) || buckets_ != NULL) { 639f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 640f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 641f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_ = createBucketList(errorCode); 642f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) { 643b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 644b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 645b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 646f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Sort the records by name. 647f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Stable sort preserves input order of collation duplicates. 648f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode); 649f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 650f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Now, we traverse all of the input, which is now sorted. 651f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // If the item doesn't go in the current bucket, we find the next bucket that contains it. 652f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This makes the process order n*log(n), since we just sort the list and then do a linear process. 653f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so 654f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // we need to improve it for that case. 655f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 656f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0); 657f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t bucketIndex = 1; 658f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *nextBucket; 659f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *upperBoundary; 660f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketIndex < buckets_->bucketList_->size()) { 661f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); 662f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = &nextBucket->lowerBoundary_; 663f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 664f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = NULL; 665f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = NULL; 666f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 667f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (int32_t i = 0; i < inputList_->size(); ++i) { 668f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Record *r = getRecord(*inputList_, i); 669f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // if the current bucket isn't the right one, find the one that is 670f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We have a special flag for the last bucket so that we don't look any further 671f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (upperBoundary != NULL && 672f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) { 673f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket = nextBucket; 674f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // now reset the boundary that we compare against 675f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucketIndex < buckets_->bucketList_->size()) { 676f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++); 677f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = &nextBucket->lowerBoundary_; 678b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 679f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius upperBoundary = NULL; 680f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 681f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 682f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // now put the record into the bucket. 683f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Bucket *bucket = currentBucket; 684f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->displayBucket_ != NULL) { 685f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket = bucket->displayBucket_; 686f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 687f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->records_ == NULL) { 688f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->records_ = new UVector(errorCode); 689f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (bucket->records_ == NULL) { 690f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 691f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 692b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 693b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 694f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius bucket->records_->addElement(r, errorCode); 695b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 696f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 697b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 698f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::clearBuckets() { 699f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ != NULL) { 700f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius delete buckets_; 701f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius buckets_ = NULL; 702f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius internalResetBucketIterator(); 703f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 704f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 705f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 706f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::internalResetBucketIterator() { 707f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_ = -1; 708f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket_ = NULL; 709b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 710b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 711b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 7120a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Corneliusvoid AlphabeticIndex::addIndexExemplars(const Locale *locale, UErrorCode &status) { 713f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 714f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Chinese index characters, which are specific to each of the several Chinese tailorings, 715f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // take precedence over the single locale data exemplar set per language. 7160a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius const char *language = locale == NULL ? NULL : locale->getLanguage(); 7170a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius if (language == NULL || 7180a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 || 719f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius uprv_strcmp(language, "ko") == 0) { 720f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // TODO: This should be done regardless of the language, but it's expensive. 721f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // We should add a Collator function (can be @internal) 722f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // to enumerate just the contractions that start with a given code point or string. 723f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (addChineseIndexCharacters(status) || U_FAILURE(status)) { 724f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 725f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 726f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 7270a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius if (locale == NULL) { return; } 728f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 7290a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius LocalULocaleDataPointer uld(ulocdata_open(locale->getName(), &status)); 730b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 731b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 732b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 733b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 734b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet exemplars; 735b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status); 736b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_SUCCESS(status)) { 737f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->addAll(exemplars); 738b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 739b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 740b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR 741b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 742f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The locale data did not include explicit Index characters. 743b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Synthesize a set of them from the locale's standard exemplar characters. 744b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status); 745b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 746b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 747b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 748b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 749f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // question: should we add auxiliary exemplars? 750f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) { 751f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.add(0x61, 0x7A); 752f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 753f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables 754f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // cut down to small list 755f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.remove(0xAC00, 0xD7A3). 756f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C). 757f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544). 758f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0). 759f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius add(0xD30C).add(0xD558); 760f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 761f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block 762f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // cut down to small list 763f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // make use of the fact that Ethiopic is allocated in 8's, where 764f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // the base is 0 mod 8. 765f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSet ethiopic( 766f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status); 767f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator it(ethiopic); 768f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (it.next() && !it.isString()) { 769f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if ((it.getCodepoint() & 0x7) != 0) { 770f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius exemplars.remove(it.getCodepoint()); 771f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 772f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 773f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 774f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 775b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Upper-case any that aren't already so. 776b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (We only do this for synthesized index characters.) 777b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSetIterator it(exemplars); 778b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString upperC; 779b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while (it.next()) { 780b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UnicodeString &exemplarC = it.getString(); 781b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho upperC = exemplarC; 7820a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius upperC.toUpper(*locale); 783f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->add(upperC); 784b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 785f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} 786b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 787f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) { 788f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSet contractions; 789f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius ucol_getContractionsAndExpansions(collatorPrimaryOnly_->getUCollator(), 790f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius contractions.toUSet(), NULL, FALSE, &errorCode); 791f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(errorCode)) { return FALSE; } 792f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString firstHanBoundary; 793f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UBool hasPinyin = FALSE; 794f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeSetIterator iter(contractions); 795f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius while (iter.next()) { 796f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &s = iter.getString(); 797f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (s.startsWith(BASE, BASE_LENGTH)) { 798f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->add(s); 799f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (firstHanBoundary.isEmpty() || 800f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->compare(s, firstHanBoundary, errorCode) < 0) { 801f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstHanBoundary = s; 802f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 803f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UChar c = s.charAt(s.length() - 1); 804f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (0x41 <= c && c <= 0x5A) { // A-Z 805f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius hasPinyin = TRUE; 806f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 807f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 808b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 809f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hasPinyin) { 810f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initialLabels_->add(0x41, 0x5A); // A-Z 811f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 812f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (!firstHanBoundary.isEmpty()) { 813f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The hardcoded list of script boundaries includes U+4E00 814f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // which is tailored to not be the first primary 815f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // in all Chinese tailorings except "unihan". 816f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Replace U+4E00 with the first boundary string from the tailoring. 817f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // TODO: This becomes obsolete when the root collator gets 818f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // reliable script-first-primary mappings. 819f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t hanIndex = binarySearch( 820f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *firstCharsInScripts_, UnicodeString((UChar)0x4E00), *collatorPrimaryOnly_); 821f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (hanIndex >= 0) { 822f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString *fh = new UnicodeString(firstHanBoundary); 823f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (fh == NULL) { 824f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 825f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 826b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 827f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->setElementAt(fh, hanIndex); 828b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 829f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return TRUE; 830f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 831f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 832b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 833b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 834b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 835b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 836b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 837b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Return the string with interspersed CGJs. Input must have more than 2 codepoints. 838b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 839f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar CGJ = 0x034F; 840b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUnicodeString AlphabeticIndex::separated(const UnicodeString &item) { 841b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString result; 842b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (item.length() == 0) { 843b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 844b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 845b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i = 0; 846b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for (;;) { 847b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar32 cp = item.char32At(i); 848b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.append(cp); 849b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho i = item.moveIndex32(i, 1); 850b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i >= item.length()) { 851b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 852b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 853b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.append(CGJ); 854b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 855b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 856b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 857b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 858b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 859b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const { 860b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 861b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 862b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 863b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 864b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const { 865b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 866b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 867b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 868b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 869b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst RuleBasedCollator &AlphabeticIndex::getCollator() const { 870b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // There are no known non-RuleBasedCollator collators, and none ever expected. 871b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // But, in case that changes, better a null pointer than a wrong type. 872b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *dynamic_cast<RuleBasedCollator *>(collator_); 873b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 874b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 875b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 876b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getInflowLabel() const { 877b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return inflowLabel_; 878b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 879b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 880b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getOverflowLabel() const { 881b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return overflowLabel_; 882b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 883b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 884b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 885b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getUnderflowLabel() const { 886b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return underflowLabel_; 887b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 888b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 889b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 890b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 891b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inflowLabel_ = label; 892f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 893b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 894b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 895b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 896b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 897b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 898b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho overflowLabel_ = label; 899f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 900b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 901b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 902b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 903b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 904b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) { 905b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho underflowLabel_ = label; 906f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 907b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 908b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 909b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 910b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 911b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getMaxLabelCount() const { 912b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return maxLabelCount_; 913b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 914b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 915b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 916b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) { 917b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 918b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 919b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 920b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (maxLabelCount <= 0) { 921b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ILLEGAL_ARGUMENT_ERROR; 922b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 923b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 924b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho maxLabelCount_ = maxLabelCount; 925f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 926b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 927b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 928b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 929b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 930b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 931b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// init() - Common code for constructors. 932b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 933b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 934f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusvoid AlphabeticIndex::init(const Locale *locale, UErrorCode &status) { 935f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 936f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (locale == NULL && collator_ == NULL) { 937f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 938b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 939b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 940f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 941b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho initialLabels_ = new UnicodeSet(); 942f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (initialLabels_ == NULL) { 943f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 944f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 945f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 946b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 947f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inflowLabel_.setTo((UChar)0x2026); // Ellipsis 948b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho overflowLabel_ = inflowLabel_; 949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho underflowLabel_ = inflowLabel_; 950b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 951f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collator_ == NULL) { 952f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collator_ = static_cast<RuleBasedCollator *>(Collator::createInstance(*locale, status)); 953f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 954f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collator_ == NULL) { 955f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 956f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 957f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 958f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 959f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone()); 960f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_ == NULL) { 961f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 962b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 963b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 964f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status); 965f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_ = firstStringsInScript(status); 966f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 967f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status); 9680a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius 9690a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius // Add index exemplar characters before checking the script boundaries, 9700a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius // since this might modify them. 9710a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius addIndexExemplars(locale, status); 9720a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius 973f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString _4E00((UChar)0x4E00); 9740a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius int32_t hanIndex = binarySearch( 9750a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius *firstCharsInScripts_, _4E00, *collatorPrimaryOnly_); 9760a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius if (hanIndex >= 0) { 9770a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius // Adjust the Han script boundary if necessary. 978f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // TODO: This becomes obsolete when the root collator gets 979f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // reliable script-first-primary mappings. 9800a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius UnicodeString _1100((UChar)0x1100); 9810a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius UnicodeString _1112((UChar)0x1112); 9820a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius UnicodeString _4E9C((UChar)0x4E9C); 9830a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius if (collatorPrimaryOnly_->compare(_4E00, _1112, status) <= 0 && 9840a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius collatorPrimaryOnly_->compare(_1100, _4E00, status) <= 0) { 9850a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius // The standard Korean tailoring sorts Hanja (Han characters) 9860a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius // as secondary differences from Hangul syllables. 9870a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius // This makes U+4E00 not useful as a Han-script boundary. 988f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->removeElementAt(hanIndex); 9890a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius } else if (collatorPrimaryOnly_->compare(_4E9C, _4E00, status) < 0) { 9900a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius // The standard Japanese tailoring sorts U+4E9C first among Kanji. 9910a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius UnicodeString *fh = new UnicodeString(_4E9C); 9920a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius if (fh == NULL) { 9930a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 9940a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius return; 9950a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius } 9960a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius firstCharsInScripts_->setElementAt(fh, hanIndex); 997b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 998f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 9990a61a367aa48577edf1c9ba57c501b8f5e7555d5Craig Cornelius 1000f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Guard against a degenerate collator where 1001f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // some script boundary strings are primary ignorable. 1002f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius for (;;) { 1003f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return; } 1004f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (firstCharsInScripts_->isEmpty()) { 1005f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // AlphabeticIndex requires some non-ignorable script boundary strings. 1006f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 1007f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return; 1008b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1009f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (collatorPrimaryOnly_->compare( 1010f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)), 1011f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius emptyString_, status) == UCOL_EQUAL) { 1012f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius firstCharsInScripts_->removeElementAt(0); 1013f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } else { 1014f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius break; 1015b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1016b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1017b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1018b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1019b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1020b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1021b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Comparison function for UVector<UnicodeString *> sorting with a collator. 1022b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1023b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 1024f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliuscollatorComparator(const void *context, const void *left, const void *right) { 1025103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *leftElement = static_cast<const UElement *>(left); 1026103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *rightElement = static_cast<const UElement *>(right); 1027103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer); 1028103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer); 1029b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1030b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (leftString == rightString) { 1031b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Catches case where both are NULL 1032b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1033b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1034b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (leftString == NULL) { 1035b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 1; 1036b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 1037b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (rightString == NULL) { 1038b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return -1; 1039b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1040f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const Collator *col = static_cast<const Collator *>(context); 1041f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 1042f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return col->compare(*leftString, *rightString, errorCode); 1043b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1044b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1045b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1046b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Comparison function for UVector<Record *> sorting with a collator. 1047b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1048b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 1049b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehorecordCompareFn(const void *context, const void *left, const void *right) { 1050103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *leftElement = static_cast<const UElement *>(left); 1051103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UElement *rightElement = static_cast<const UElement *>(right); 1052103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer); 1053103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer); 1054b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const Collator *col = static_cast<const Collator *>(context); 1055f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode errorCode = U_ZERO_ERROR; 1056f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return col->compare(leftRec->name_, rightRec->name_, errorCode); 1057b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1058b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1059b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1060f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius/** 1061f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * This list contains one character per script that has the 1062f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * lowest primary weight for that script in the root collator. 1063f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * This list will be copied and sorted to account for script reordering. 1064f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 1065f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * <p>TODO: This is fragile. If the first character of a script is tailored 1066f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * so that it does not map to the script's lowest primary weight any more, 1067f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * then the buckets will be off. 1068f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * There are hacks in the code to handle the known CJK tailorings of U+4E00. 1069f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 1070f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * <p>We use "A" not "a" because the en_US_POSIX tailoring sorts A primary-before a. 1071f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * 1072f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Keep this in sync with HACK_FIRST_CHARS_IN_SCRIPTS in 1073f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * ICU4J main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java 1074f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius */ 1075f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusstatic const UChar HACK_FIRST_CHARS_IN_SCRIPTS[] = { 1076f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x41, 0, 0x03B1, 0, 1077f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x2C81, 0, 0x0430, 0, 0x2C30, 0, 0x10D0, 0, 0x0561, 0, 0x05D0, 0, 0xD802, 0xDD00, 0, 0x0800, 0, 0x0621, 0, 0x0710, 0, 1078f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x0780, 0, 0x07CA, 0, 0x2D30, 0, 0x1200, 0, 0x0950, 0, 0x0985, 0, 0x0A74, 0, 0x0AD0, 0, 0x0B05, 0, 0x0BD0, 0, 1079f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x0C05, 0, 0x0C85, 0, 0x0D05, 0, 0x0D85, 0, 1080f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xAAF2, 0, // Meetei Mayek 1081f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xA800, 0, 0xA882, 0, 0xD804, 0xDC83, 0, 1082f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x111C4), U16_TRAIL(0x111C4), 0, // Sharada 1083f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x11680), U16_TRAIL(0x11680), 0, // Takri 1084f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x1B83, 0, 1085f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD802, 0xDE00, 0, 0x0E01, 0, 1086f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x0EDE, 0, // Lao 1087f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xAA80, 0, 0x0F40, 0, 0x1C00, 0, 0xA840, 0, 0x1900, 0, 0x1700, 0, 0x1720, 0, 1088f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x1740, 0, 0x1760, 0, 0x1A00, 0, 0xA930, 0, 0xA90A, 0, 0x1000, 0, 1089f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x11103), U16_TRAIL(0x11103), 0, // Chakma 1090f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x1780, 0, 0x1950, 0, 0x1980, 0, 0x1A20, 0, 1091f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xAA00, 0, 0x1B05, 0, 0xA984, 0, 0x1880, 0, 0x1C5A, 0, 0x13A0, 0, 0x1401, 0, 0x1681, 0, 0x16A0, 0, 0xD803, 0xDC00, 0, 1092f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xA500, 0, 0xA6A0, 0, 0x1100, 0, 0x3041, 0, 0x30A1, 0, 0x3105, 0, 0xA000, 0, 0xA4F8, 0, 1093f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x16F00), U16_TRAIL(0x16F00), 0, // Miao 1094f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD800, 0xDE80, 0, 1095f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD800, 0xDEA0, 0, 0xD802, 0xDD20, 0, 0xD800, 0xDF00, 0, 0xD800, 0xDF30, 0, 0xD801, 0xDC28, 0, 0xD801, 0xDC50, 0, 1096f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD801, 0xDC80, 0, 1097f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x110D0), U16_TRAIL(0x110D0), 0, // Sora Sompeng 1098f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD800, 0xDC00, 0, 0xD802, 0xDC00, 0, 0xD802, 0xDE60, 0, 0xD802, 0xDF00, 0, 0xD802, 0xDC40, 0, 1099f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xD802, 0xDF40, 0, 0xD802, 0xDF60, 0, 0xD800, 0xDF80, 0, 0xD800, 0xDFA0, 0, 0xD808, 0xDC00, 0, 0xD80C, 0xDC00, 0, 1100f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x109A0), U16_TRAIL(0x109A0), 0, // Meroitic Cursive 1101f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius U16_LEAD(0x10980), U16_TRAIL(0x10980), 0, // Meroitic Hieroglyphs 1102f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0x4E00, 0, 1103f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // TODO: The overflow bucket's lowerBoundary string should be the 1104f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // first item after the last reordering group in the collator's script order. 1105f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This should normally be the first Unicode code point 1106f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // that is unassigned (U+0378 in Unicode 6.3) and untailored. 1107f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // However, at least up to ICU 51 the Hani reordering group includes 1108f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // unassigned code points, 1109f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and there is no stable string for the start of the trailing-weights range. 1110f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // The only known string that sorts "high" is U+FFFF. 1111f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // When ICU separates Hani vs. unassigned reordering groups, we need to fix this, 1112f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // and fix relevant test code. 1113f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // Ideally, FractionalUCA.txt will have a "script first primary" 1114f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // for unassigned code points. 1115f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 0xFFFF, 0 1116f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius}; 1117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { 1119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 1121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UVector *dest = new UVector(status); 1123103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (dest == NULL) { 1124f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 1126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1127103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius dest->setDeleter(uprv_deleteUObject); 1128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UChar *src = HACK_FIRST_CHARS_IN_SCRIPTS; 1129f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UChar *limit = src + LENGTHOF(HACK_FIRST_CHARS_IN_SCRIPTS); 1130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 1131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return dest; 1133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeString *str = new UnicodeString(src, -1); 1135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (str == NULL) { 1136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_MEMORY_ALLOCATION_ERROR; 1137f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return dest; 1138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1139f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius dest->addElement(str, status); 1140f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius src += str->length() + 1; 1141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while (src < limit); 1142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return dest; 1143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1146f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Corneliusnamespace { 1147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 1149f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Returns true if one index character string is "better" than the other. 1150f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * Shorter NFKD is better, and otherwise NFKD-binary-less-than is 1151f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius * better, and otherwise binary-less-than is better. 1152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 1153f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusUBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer, 1154f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString &one, const UnicodeString &other) { 1155f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius // This is called with primary-equal strings, but never with one.equals(other). 1156f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UErrorCode status = U_ZERO_ERROR; 1157f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString n1 = nfkdNormalizer.normalize(one, status); 1158f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UnicodeString n2 = nfkdNormalizer.normalize(other, status); 1159f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_FAILURE(status)) { return FALSE; } 1160f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius int32_t result = n1.countChar32() - n2.countChar32(); 1161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result != 0) { 1162f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return result < 0; 1163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result = n1.compareCodePointOrder(n2); 1165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (result != 0) { 1166f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return result < 0; 1167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1168f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return one.compareCodePointOrder(other) < 0; 1169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1171f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius} // namespace 1172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Constructor & Destructor for AlphabeticIndex::Record 1175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// 1176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Records are internal only, instances are not directly surfaced in the public API. 1177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// This class is mostly struct-like, with all public fields. 1178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1179f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig CorneliusAlphabeticIndex::Record::Record(const UnicodeString &name, const void *data) 1180f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : name_(name), data_(data) {} 1181f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius 1182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Record::~Record() { 1183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) { 1187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1190f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (inputList_ == NULL) { 1191f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_ = new UVector(status); 1192f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (inputList_ == NULL) { 1193f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1194f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return *this; 1195f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1196f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->setDeleter(alphaIndex_deleteRecord); 1197f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1198f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius Record *r = new Record(name, data); 1199f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (r == NULL) { 1200f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 1201f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return *this; 1202f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1203f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->addElement(r, status); 1204f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 1205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::string ss; 1206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::string ss2; 1207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << 1208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl; 1209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) { 1214f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) { 1215f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius inputList_->removeAllElements(); 1216f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius clearBuckets(); 1217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) { 1222f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 1223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1226f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status); 1227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketIndex() const { 1231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return labelsIterIndex_; 1232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextBucket(UErrorCode &status) { 1236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1239f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ == NULL && currentBucket_ != NULL) { 1240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ENUM_OUT_OF_SYNC_ERROR; 1241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1243f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius initBuckets(status); 1244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++labelsIterIndex_; 1248f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (labelsIterIndex_ >= buckets_->getBucketCount()) { 1249f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius labelsIterIndex_ = buckets_->getBucketCount(); 1250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1252f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_); 1253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho resetRecordIterator(); 1254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 1255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getBucketLabel() const { 1258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ != NULL) { 1259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->label_; 1260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1261f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return emptyString_; 1262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const { 1267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ != NULL) { 1268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->labelType_; 1269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return U_ALPHAINDEX_NORMAL; 1271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t AlphabeticIndex::getBucketRecordCount() const { 1276f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL) { 1277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return currentBucket_->records_->size(); 1278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 1279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 1280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) { 1285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1288f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius internalResetBucketIterator(); 1289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool AlphabeticIndex::nextRecord(UErrorCode &status) { 1294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (U_FAILURE(status)) { 1295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (currentBucket_ == NULL) { 1298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // We are trying to iterate over the items in a bucket, but there is no 1299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // current bucket from the enumeration of buckets. 1300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_INVALID_STATE_ERROR; 1301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1303f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (buckets_ == NULL) { 1304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho status = U_ENUM_OUT_OF_SYNC_ERROR; 1305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1307f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_->records_ == NULL) { 1308f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius return FALSE; 1309f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius } 1310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++itemsIterIndex_; 1311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (itemsIterIndex_ >= currentBucket_->records_->size()) { 1312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ = currentBucket_->records_->size(); 1313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 1314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 1316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UnicodeString &AlphabeticIndex::getRecordName() const { 1320f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius const UnicodeString *retStr = &emptyString_; 1321f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL && 1322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ >= 0 && 1323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ < currentBucket_->records_->size()) { 1324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); 1325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retStr = &item->name_; 1326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *retStr; 1328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst void *AlphabeticIndex::getRecordData() const { 1331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const void *retPtr = NULL; 1332f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius if (currentBucket_ != NULL && currentBucket_->records_ != NULL && 1333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ >= 0 && 1334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ < currentBucket_->records_->size()) { 1335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_)); 1336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho retPtr = item->data_; 1337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 1338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return retPtr; 1339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex & AlphabeticIndex::resetRecordIterator() { 1343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho itemsIterIndex_ = -1; 1344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 1345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::Bucket(const UnicodeString &label, 1350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UnicodeString &lowerBoundary, 1351f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius UAlphabeticIndexLabelType type) 1352f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius : label_(label), lowerBoundary_(lowerBoundary), labelType_(type), 1353f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius displayBucket_(NULL), displayIndex_(-1), 1354f760e5e9e080f32b3afdfaea0b961ce09eb052f4Craig Cornelius records_(NULL) { 1355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoAlphabeticIndex::Bucket::~Bucket() { 1359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete records_; 1360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 1361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 1363103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 1364103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif 1365