1/* 2******************************************************************************* 3* Copyright (C) 1996-2009, International Business Machines Corporation and * 4* others. All Rights Reserved. * 5******************************************************************************* 6*/ 7 8/* 9* File coleitr.cpp 10* 11* 12* 13* Created by: Helena Shih 14* 15* Modification History: 16* 17* Date Name Description 18* 19* 6/23/97 helena Adding comments to make code more readable. 20* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java 21* 12/10/99 aliu Ported Thai collation support from Java. 22* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) 23* 02/19/01 swquek Removed CollationElementsIterator() since it is 24* private constructor and no calls are made to it 25*/ 26 27#include "unicode/utypes.h" 28 29#if !UCONFIG_NO_COLLATION 30 31#include "unicode/coleitr.h" 32#include "unicode/ustring.h" 33#include "ucol_imp.h" 34#include "cmemory.h" 35 36 37/* Constants --------------------------------------------------------------- */ 38 39U_NAMESPACE_BEGIN 40 41UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) 42 43/* CollationElementIterator public constructor/destructor ------------------ */ 44 45CollationElementIterator::CollationElementIterator( 46 const CollationElementIterator& other) 47 : UObject(other), isDataOwned_(TRUE) 48{ 49 UErrorCode status = U_ZERO_ERROR; 50 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, 51 &status); 52 53 *this = other; 54} 55 56CollationElementIterator::~CollationElementIterator() 57{ 58 if (isDataOwned_) { 59 ucol_closeElements(m_data_); 60 } 61} 62 63/* CollationElementIterator public methods --------------------------------- */ 64 65int32_t CollationElementIterator::getOffset() const 66{ 67 return ucol_getOffset(m_data_); 68} 69 70/** 71* Get the ordering priority of the next character in the string. 72* @return the next character's ordering. Returns NULLORDER if an error has 73* occured or if the end of string has been reached 74*/ 75int32_t CollationElementIterator::next(UErrorCode& status) 76{ 77 return ucol_next(m_data_, &status); 78} 79 80UBool CollationElementIterator::operator!=( 81 const CollationElementIterator& other) const 82{ 83 return !(*this == other); 84} 85 86UBool CollationElementIterator::operator==( 87 const CollationElementIterator& that) const 88{ 89 if (this == &that || m_data_ == that.m_data_) { 90 return TRUE; 91 } 92 93 // option comparison 94 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) 95 { 96 return FALSE; 97 } 98 99 // the constructor and setText always sets a length 100 // and we only compare the string not the contents of the normalization 101 // buffer 102 int thislength = m_data_->iteratordata_.endp - 103 m_data_->iteratordata_.string; 104 int thatlength = that.m_data_->iteratordata_.endp - 105 that.m_data_->iteratordata_.string; 106 107 if (thislength != thatlength) { 108 return FALSE; 109 } 110 111 if (uprv_memcmp(m_data_->iteratordata_.string, 112 that.m_data_->iteratordata_.string, 113 thislength * U_SIZEOF_UCHAR) != 0) { 114 return FALSE; 115 } 116 if (getOffset() != that.getOffset()) { 117 return FALSE; 118 } 119 120 // checking normalization buffer 121 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { 122 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) { 123 return FALSE; 124 } 125 // both are in the normalization buffer 126 if (m_data_->iteratordata_.pos 127 - m_data_->iteratordata_.writableBuffer 128 != that.m_data_->iteratordata_.pos 129 - that.m_data_->iteratordata_.writableBuffer) { 130 // not in the same position in the normalization buffer 131 return FALSE; 132 } 133 } 134 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { 135 return FALSE; 136 } 137 // checking ce position 138 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs) 139 == (that.m_data_->iteratordata_.CEpos 140 - that.m_data_->iteratordata_.CEs); 141} 142 143/** 144* Get the ordering priority of the previous collation element in the string. 145* @param status the error code status. 146* @return the previous element's ordering. Returns NULLORDER if an error has 147* occured or if the start of string has been reached. 148*/ 149int32_t CollationElementIterator::previous(UErrorCode& status) 150{ 151 return ucol_previous(m_data_, &status); 152} 153 154/** 155* Resets the cursor to the beginning of the string. 156*/ 157void CollationElementIterator::reset() 158{ 159 ucol_reset(m_data_); 160} 161 162void CollationElementIterator::setOffset(int32_t newOffset, 163 UErrorCode& status) 164{ 165 ucol_setOffset(m_data_, newOffset, &status); 166} 167 168/** 169* Sets the source to the new source string. 170*/ 171void CollationElementIterator::setText(const UnicodeString& source, 172 UErrorCode& status) 173{ 174 if (U_FAILURE(status)) { 175 return; 176 } 177 178 int32_t length = source.length(); 179 UChar *string = NULL; 180 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { 181 uprv_free(m_data_->iteratordata_.string); 182 } 183 m_data_->isWritable = TRUE; 184 if (length > 0) { 185 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 186 /* test for NULL */ 187 if (string == NULL) { 188 status = U_MEMORY_ALLOCATION_ERROR; 189 return; 190 } 191 u_memcpy(string, source.getBuffer(), length); 192 } 193 else { 194 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 195 /* test for NULL */ 196 if (string == NULL) { 197 status = U_MEMORY_ALLOCATION_ERROR; 198 return; 199 } 200 *string = 0; 201 } 202 /* Free offsetBuffer before initializing it. */ 203 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); 204 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, 205 &m_data_->iteratordata_); 206 207 m_data_->reset_ = TRUE; 208} 209 210// Sets the source to the new character iterator. 211void CollationElementIterator::setText(CharacterIterator& source, 212 UErrorCode& status) 213{ 214 if (U_FAILURE(status)) 215 return; 216 217 int32_t length = source.getLength(); 218 UChar *buffer = NULL; 219 220 if (length == 0) { 221 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 222 /* test for NULL */ 223 if (buffer == NULL) { 224 status = U_MEMORY_ALLOCATION_ERROR; 225 return; 226 } 227 *buffer = 0; 228 } 229 else { 230 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 231 /* test for NULL */ 232 if (buffer == NULL) { 233 status = U_MEMORY_ALLOCATION_ERROR; 234 return; 235 } 236 /* 237 Using this constructor will prevent buffer from being removed when 238 string gets removed 239 */ 240 UnicodeString string; 241 source.getText(string); 242 u_memcpy(buffer, string.getBuffer(), length); 243 } 244 245 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { 246 uprv_free(m_data_->iteratordata_.string); 247 } 248 m_data_->isWritable = TRUE; 249 /* Free offsetBuffer before initializing it. */ 250 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); 251 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, 252 &m_data_->iteratordata_); 253 m_data_->reset_ = TRUE; 254} 255 256int32_t CollationElementIterator::strengthOrder(int32_t order) const 257{ 258 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); 259 // Mask off the unwanted differences. 260 if (s == UCOL_PRIMARY) { 261 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; 262 } 263 else if (s == UCOL_SECONDARY) { 264 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; 265 } 266 267 return order; 268} 269 270/* CollationElementIterator private constructors/destructors --------------- */ 271 272/** 273* This is the "real" constructor for this class; it constructs an iterator 274* over the source text using the specified collator 275*/ 276CollationElementIterator::CollationElementIterator( 277 const UnicodeString& sourceText, 278 const RuleBasedCollator* order, 279 UErrorCode& status) 280 : isDataOwned_(TRUE) 281{ 282 if (U_FAILURE(status)) { 283 return; 284 } 285 286 int32_t length = sourceText.length(); 287 UChar *string = NULL; 288 289 if (length > 0) { 290 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 291 /* test for NULL */ 292 if (string == NULL) { 293 status = U_MEMORY_ALLOCATION_ERROR; 294 return; 295 } 296 /* 297 Using this constructor will prevent buffer from being removed when 298 string gets removed 299 */ 300 u_memcpy(string, sourceText.getBuffer(), length); 301 } 302 else { 303 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 304 /* test for NULL */ 305 if (string == NULL) { 306 status = U_MEMORY_ALLOCATION_ERROR; 307 return; 308 } 309 *string = 0; 310 } 311 m_data_ = ucol_openElements(order->ucollator, string, length, &status); 312 313 /* Test for buffer overflows */ 314 if (U_FAILURE(status)) { 315 return; 316 } 317 m_data_->isWritable = TRUE; 318} 319 320/** 321* This is the "real" constructor for this class; it constructs an iterator over 322* the source text using the specified collator 323*/ 324CollationElementIterator::CollationElementIterator( 325 const CharacterIterator& sourceText, 326 const RuleBasedCollator* order, 327 UErrorCode& status) 328 : isDataOwned_(TRUE) 329{ 330 if (U_FAILURE(status)) 331 return; 332 333 // **** should I just drop this test? **** 334 /* 335 if ( sourceText.endIndex() != 0 ) 336 { 337 // A CollationElementIterator is really a two-layered beast. 338 // Internally it uses a Normalizer to munge the source text into a form 339 // where all "composed" Unicode characters (such as \u00FC) are split into a 340 // normal character and a combining accent character. 341 // Afterward, CollationElementIterator does its own processing to handle 342 // expanding and contracting collation sequences, ignorables, and so on. 343 344 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL 345 ? Normalizer::NO_OP : order->getDecomposition(); 346 347 text = new Normalizer(sourceText, decomp); 348 if (text == NULL) 349 status = U_MEMORY_ALLOCATION_ERROR; 350 } 351 */ 352 int32_t length = sourceText.getLength(); 353 UChar *buffer; 354 if (length > 0) { 355 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); 356 /* test for NULL */ 357 if (buffer == NULL) { 358 status = U_MEMORY_ALLOCATION_ERROR; 359 return; 360 } 361 /* 362 Using this constructor will prevent buffer from being removed when 363 string gets removed 364 */ 365 UnicodeString string(buffer, length, length); 366 ((CharacterIterator &)sourceText).getText(string); 367 const UChar *temp = string.getBuffer(); 368 u_memcpy(buffer, temp, length); 369 } 370 else { 371 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); 372 /* test for NULL */ 373 if (buffer == NULL) { 374 status = U_MEMORY_ALLOCATION_ERROR; 375 return; 376 } 377 *buffer = 0; 378 } 379 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); 380 381 /* Test for buffer overflows */ 382 if (U_FAILURE(status)) { 383 return; 384 } 385 m_data_->isWritable = TRUE; 386} 387 388/* CollationElementIterator protected methods ----------------------------- */ 389 390const CollationElementIterator& CollationElementIterator::operator=( 391 const CollationElementIterator& other) 392{ 393 if (this != &other) 394 { 395 UCollationElements *ucolelem = this->m_data_; 396 UCollationElements *otherucolelem = other.m_data_; 397 collIterate *coliter = &(ucolelem->iteratordata_); 398 collIterate *othercoliter = &(otherucolelem->iteratordata_); 399 int length = 0; 400 401 // checking only UCOL_ITER_HASLEN is not enough here as we may be in 402 // the normalization buffer 403 length = othercoliter->endp - othercoliter->string; 404 405 ucolelem->reset_ = otherucolelem->reset_; 406 ucolelem->isWritable = TRUE; 407 408 /* create a duplicate of string */ 409 if (length > 0) { 410 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); 411 if(coliter->string != NULL) { 412 uprv_memcpy(coliter->string, othercoliter->string, 413 length * U_SIZEOF_UCHAR); 414 } else { // Error: couldn't allocate memory. No copying should be done 415 length = 0; 416 } 417 } 418 else { 419 coliter->string = NULL; 420 } 421 422 /* start and end of string */ 423 coliter->endp = coliter->string + length; 424 425 /* handle writable buffer here */ 426 427 if (othercoliter->flags & UCOL_ITER_INNORMBUF) { 428 uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1; 429 if (wlength < coliter->writableBufSize) { 430 uprv_memcpy(coliter->stackWritableBuffer, 431 othercoliter->stackWritableBuffer, 432 wlength * U_SIZEOF_UCHAR); 433 } 434 else { 435 if (coliter->writableBuffer != coliter->stackWritableBuffer) { 436 uprv_free(coliter->writableBuffer); 437 } 438 coliter->writableBuffer = (UChar *)uprv_malloc( 439 wlength * U_SIZEOF_UCHAR); 440 if(coliter->writableBuffer != NULL) { 441 uprv_memcpy(coliter->writableBuffer, 442 othercoliter->writableBuffer, 443 wlength * U_SIZEOF_UCHAR); 444 coliter->writableBufSize = wlength; 445 } else { // Error: couldn't allocate memory for writableBuffer 446 coliter->writableBufSize = 0; 447 } 448 } 449 } 450 451 /* current position */ 452 if (othercoliter->pos >= othercoliter->string && 453 othercoliter->pos <= othercoliter->endp) 454 { 455 coliter->pos = coliter->string + 456 (othercoliter->pos - othercoliter->string); 457 } 458 else if (coliter->writableBuffer != NULL) { 459 coliter->pos = coliter->writableBuffer + 460 (othercoliter->pos - othercoliter->writableBuffer); 461 } 462 else { 463 // Error: couldn't allocate memory for writableBuffer 464 coliter->pos = NULL; 465 } 466 467 /* CE buffer */ 468 int32_t CEsize; 469 if (coliter->extendCEs) { 470 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE); 471 CEsize = sizeof(othercoliter->extendCEs); 472 if (CEsize > 0) { 473 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize); 474 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize); 475 } 476 coliter->toReturn = coliter->extendCEs + 477 (othercoliter->toReturn - othercoliter->extendCEs); 478 coliter->CEpos = coliter->extendCEs + CEsize; 479 } else { 480 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs); 481 if (CEsize > 0) { 482 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize); 483 } 484 coliter->toReturn = coliter->CEs + 485 (othercoliter->toReturn - othercoliter->CEs); 486 coliter->CEpos = coliter->CEs + CEsize; 487 } 488 489 if (othercoliter->fcdPosition != NULL) { 490 coliter->fcdPosition = coliter->string + 491 (othercoliter->fcdPosition 492 - othercoliter->string); 493 } 494 else { 495 coliter->fcdPosition = NULL; 496 } 497 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/; 498 coliter->origFlags = othercoliter->origFlags; 499 coliter->coll = othercoliter->coll; 500 this->isDataOwned_ = TRUE; 501 } 502 503 return *this; 504} 505 506U_NAMESPACE_END 507 508#endif /* #if !UCONFIG_NO_COLLATION */ 509 510/* eof */ 511