1/* 2********************************************************************** 3* Copyright (C) 2001-2014 IBM and others. All rights reserved. 4********************************************************************** 5* Date Name Description 6* 03/22/2000 helena Creation. 7********************************************************************** 8*/ 9 10#include "unicode/utypes.h" 11 12#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 13 14#include "unicode/stsearch.h" 15#include "usrchimp.h" 16#include "cmemory.h" 17 18U_NAMESPACE_BEGIN 19 20UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) 21 22// public constructors and destructors ----------------------------------- 23 24StringSearch::StringSearch(const UnicodeString &pattern, 25 const UnicodeString &text, 26 const Locale &locale, 27 BreakIterator *breakiter, 28 UErrorCode &status) : 29 SearchIterator(text, breakiter), 30 m_pattern_(pattern) 31{ 32 if (U_FAILURE(status)) { 33 m_strsrch_ = NULL; 34 return; 35 } 36 37 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 38 m_text_.getBuffer(), m_text_.length(), 39 locale.getName(), (UBreakIterator *)breakiter, 40 &status); 41 uprv_free(m_search_); 42 m_search_ = NULL; 43 44 if (U_SUCCESS(status)) { 45 // m_search_ has been created by the base SearchIterator class 46 m_search_ = m_strsrch_->search; 47 } 48} 49 50StringSearch::StringSearch(const UnicodeString &pattern, 51 const UnicodeString &text, 52 RuleBasedCollator *coll, 53 BreakIterator *breakiter, 54 UErrorCode &status) : 55 SearchIterator(text, breakiter), 56 m_pattern_(pattern) 57{ 58 if (U_FAILURE(status)) { 59 m_strsrch_ = NULL; 60 return; 61 } 62 if (coll == NULL) { 63 status = U_ILLEGAL_ARGUMENT_ERROR; 64 m_strsrch_ = NULL; 65 return; 66 } 67 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 68 m_pattern_.length(), 69 m_text_.getBuffer(), 70 m_text_.length(), coll->toUCollator(), 71 (UBreakIterator *)breakiter, 72 &status); 73 uprv_free(m_search_); 74 m_search_ = NULL; 75 76 if (U_SUCCESS(status)) { 77 // m_search_ has been created by the base SearchIterator class 78 m_search_ = m_strsrch_->search; 79 } 80} 81 82StringSearch::StringSearch(const UnicodeString &pattern, 83 CharacterIterator &text, 84 const Locale &locale, 85 BreakIterator *breakiter, 86 UErrorCode &status) : 87 SearchIterator(text, breakiter), 88 m_pattern_(pattern) 89{ 90 if (U_FAILURE(status)) { 91 m_strsrch_ = NULL; 92 return; 93 } 94 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 95 m_text_.getBuffer(), m_text_.length(), 96 locale.getName(), (UBreakIterator *)breakiter, 97 &status); 98 uprv_free(m_search_); 99 m_search_ = NULL; 100 101 if (U_SUCCESS(status)) { 102 // m_search_ has been created by the base SearchIterator class 103 m_search_ = m_strsrch_->search; 104 } 105} 106 107StringSearch::StringSearch(const UnicodeString &pattern, 108 CharacterIterator &text, 109 RuleBasedCollator *coll, 110 BreakIterator *breakiter, 111 UErrorCode &status) : 112 SearchIterator(text, breakiter), 113 m_pattern_(pattern) 114{ 115 if (U_FAILURE(status)) { 116 m_strsrch_ = NULL; 117 return; 118 } 119 if (coll == NULL) { 120 status = U_ILLEGAL_ARGUMENT_ERROR; 121 m_strsrch_ = NULL; 122 return; 123 } 124 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 125 m_pattern_.length(), 126 m_text_.getBuffer(), 127 m_text_.length(), coll->toUCollator(), 128 (UBreakIterator *)breakiter, 129 &status); 130 uprv_free(m_search_); 131 m_search_ = NULL; 132 133 if (U_SUCCESS(status)) { 134 // m_search_ has been created by the base SearchIterator class 135 m_search_ = m_strsrch_->search; 136 } 137} 138 139StringSearch::StringSearch(const StringSearch &that) : 140 SearchIterator(that.m_text_, that.m_breakiterator_), 141 m_pattern_(that.m_pattern_) 142{ 143 UErrorCode status = U_ZERO_ERROR; 144 145 // Free m_search_ from the superclass 146 uprv_free(m_search_); 147 m_search_ = NULL; 148 149 if (that.m_strsrch_ == NULL) { 150 // This was not a good copy 151 m_strsrch_ = NULL; 152 } 153 else { 154 // Make a deep copy 155 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 156 m_pattern_.length(), 157 m_text_.getBuffer(), 158 m_text_.length(), 159 that.m_strsrch_->collator, 160 (UBreakIterator *)that.m_breakiterator_, 161 &status); 162 if (U_SUCCESS(status)) { 163 // m_search_ has been created by the base SearchIterator class 164 m_search_ = m_strsrch_->search; 165 } 166 } 167} 168 169StringSearch::~StringSearch() 170{ 171 if (m_strsrch_ != NULL) { 172 usearch_close(m_strsrch_); 173 m_search_ = NULL; 174 } 175} 176 177StringSearch * 178StringSearch::clone() const { 179 return new StringSearch(*this); 180} 181 182// operator overloading --------------------------------------------- 183StringSearch & StringSearch::operator=(const StringSearch &that) 184{ 185 if ((*this) != that) { 186 UErrorCode status = U_ZERO_ERROR; 187 m_text_ = that.m_text_; 188 m_breakiterator_ = that.m_breakiterator_; 189 m_pattern_ = that.m_pattern_; 190 // all m_search_ in the parent class is linked up with m_strsrch_ 191 usearch_close(m_strsrch_); 192 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 193 m_pattern_.length(), 194 m_text_.getBuffer(), 195 m_text_.length(), 196 that.m_strsrch_->collator, 197 NULL, &status); 198 // Check null pointer 199 if (m_strsrch_ != NULL) { 200 m_search_ = m_strsrch_->search; 201 } 202 } 203 return *this; 204} 205 206UBool StringSearch::operator==(const SearchIterator &that) const 207{ 208 if (this == &that) { 209 return TRUE; 210 } 211 if (SearchIterator::operator ==(that)) { 212 StringSearch &thatsrch = (StringSearch &)that; 213 return (this->m_pattern_ == thatsrch.m_pattern_ && 214 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); 215 } 216 return FALSE; 217} 218 219// public get and set methods ---------------------------------------- 220 221void StringSearch::setOffset(int32_t position, UErrorCode &status) 222{ 223 // status checked in usearch_setOffset 224 usearch_setOffset(m_strsrch_, position, &status); 225} 226 227int32_t StringSearch::getOffset(void) const 228{ 229 return usearch_getOffset(m_strsrch_); 230} 231 232void StringSearch::setText(const UnicodeString &text, UErrorCode &status) 233{ 234 if (U_SUCCESS(status)) { 235 m_text_ = text; 236 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); 237 } 238} 239 240void StringSearch::setText(CharacterIterator &text, UErrorCode &status) 241{ 242 if (U_SUCCESS(status)) { 243 text.getText(m_text_); 244 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); 245 } 246} 247 248RuleBasedCollator * StringSearch::getCollator() const 249{ 250 // Note the const_cast. It would be cleaner if this const method returned a const collator. 251 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator)); 252} 253 254void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) 255{ 256 if (U_SUCCESS(status)) { 257 usearch_setCollator(m_strsrch_, coll->toUCollator(), &status); 258 } 259} 260 261void StringSearch::setPattern(const UnicodeString &pattern, 262 UErrorCode &status) 263{ 264 if (U_SUCCESS(status)) { 265 m_pattern_ = pattern; 266 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), 267 &status); 268 } 269} 270 271const UnicodeString & StringSearch::getPattern() const 272{ 273 return m_pattern_; 274} 275 276// public methods ---------------------------------------------------- 277 278void StringSearch::reset() 279{ 280 usearch_reset(m_strsrch_); 281} 282 283SearchIterator * StringSearch::safeClone(void) const 284{ 285 UErrorCode status = U_ZERO_ERROR; 286 StringSearch *result = new StringSearch(m_pattern_, m_text_, 287 getCollator(), 288 m_breakiterator_, 289 status); 290 /* test for NULL */ 291 if (result == 0) { 292 status = U_MEMORY_ALLOCATION_ERROR; 293 return 0; 294 } 295 result->setOffset(getOffset(), status); 296 result->setMatchStart(m_strsrch_->search->matchedIndex); 297 result->setMatchLength(m_strsrch_->search->matchedLength); 298 if (U_FAILURE(status)) { 299 return NULL; 300 } 301 return result; 302} 303 304// protected method ------------------------------------------------- 305 306int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) 307{ 308 // values passed here are already in the pre-shift position 309 if (U_SUCCESS(status)) { 310 if (m_strsrch_->pattern.CELength == 0) { 311 m_search_->matchedIndex = 312 m_search_->matchedIndex == USEARCH_DONE ? 313 getOffset() : m_search_->matchedIndex + 1; 314 m_search_->matchedLength = 0; 315 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 316 &status); 317 if (m_search_->matchedIndex == m_search_->textLength) { 318 m_search_->matchedIndex = USEARCH_DONE; 319 } 320 } 321 else { 322 // looking at usearch.cpp, this part is shifted out to 323 // StringSearch instead of SearchIterator because m_strsrch_ is 324 // not accessible in SearchIterator 325#if 0 326 if (position + m_strsrch_->pattern.defaultShiftSize 327 > m_search_->textLength) { 328 setMatchNotFound(); 329 return USEARCH_DONE; 330 } 331#endif 332 if (m_search_->matchedLength <= 0) { 333 // the flipping direction issue has already been handled 334 // in next() 335 // for boundary check purposes. this will ensure that the 336 // next match will not preceed the current offset 337 // note search->matchedIndex will always be set to something 338 // in the code 339 m_search_->matchedIndex = position - 1; 340 } 341 342 ucol_setOffset(m_strsrch_->textIter, position, &status); 343 344#if 0 345 for (;;) { 346 if (m_search_->isCanonicalMatch) { 347 // can't use exact here since extra accents are allowed. 348 usearch_handleNextCanonical(m_strsrch_, &status); 349 } 350 else { 351 usearch_handleNextExact(m_strsrch_, &status); 352 } 353 if (U_FAILURE(status)) { 354 return USEARCH_DONE; 355 } 356 if (m_breakiterator_ == NULL 357#if !UCONFIG_NO_BREAK_ITERATION 358 || 359 m_search_->matchedIndex == USEARCH_DONE || 360 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 361 m_breakiterator_->isBoundary(m_search_->matchedIndex + 362 m_search_->matchedLength)) 363#endif 364 ) { 365 if (m_search_->matchedIndex == USEARCH_DONE) { 366 ucol_setOffset(m_strsrch_->textIter, 367 m_search_->textLength, &status); 368 } 369 else { 370 ucol_setOffset(m_strsrch_->textIter, 371 m_search_->matchedIndex, &status); 372 } 373 return m_search_->matchedIndex; 374 } 375 } 376#else 377 // if m_strsrch_->breakIter is always the same as m_breakiterator_ 378 // then we don't need to check the match boundaries here because 379 // usearch_handleNextXXX will already have done it. 380 if (m_search_->isCanonicalMatch) { 381 // *could* actually use exact here 'cause no extra accents allowed... 382 usearch_handleNextCanonical(m_strsrch_, &status); 383 } else { 384 usearch_handleNextExact(m_strsrch_, &status); 385 } 386 387 if (U_FAILURE(status)) { 388 return USEARCH_DONE; 389 } 390 391 if (m_search_->matchedIndex == USEARCH_DONE) { 392 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); 393 } else { 394 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); 395 } 396 397 return m_search_->matchedIndex; 398#endif 399 } 400 } 401 return USEARCH_DONE; 402} 403 404int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) 405{ 406 // values passed here are already in the pre-shift position 407 if (U_SUCCESS(status)) { 408 if (m_strsrch_->pattern.CELength == 0) { 409 m_search_->matchedIndex = 410 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : 411 m_search_->matchedIndex); 412 if (m_search_->matchedIndex == 0) { 413 setMatchNotFound(); 414 } 415 else { 416 m_search_->matchedIndex --; 417 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 418 &status); 419 m_search_->matchedLength = 0; 420 } 421 } 422 else { 423 // looking at usearch.cpp, this part is shifted out to 424 // StringSearch instead of SearchIterator because m_strsrch_ is 425 // not accessible in SearchIterator 426#if 0 427 if (!m_search_->isOverlap && 428 position - m_strsrch_->pattern.defaultShiftSize < 0) { 429 setMatchNotFound(); 430 return USEARCH_DONE; 431 } 432 433 for (;;) { 434 if (m_search_->isCanonicalMatch) { 435 // can't use exact here since extra accents are allowed. 436 usearch_handlePreviousCanonical(m_strsrch_, &status); 437 } 438 else { 439 usearch_handlePreviousExact(m_strsrch_, &status); 440 } 441 if (U_FAILURE(status)) { 442 return USEARCH_DONE; 443 } 444 if (m_breakiterator_ == NULL 445#if !UCONFIG_NO_BREAK_ITERATION 446 || 447 m_search_->matchedIndex == USEARCH_DONE || 448 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 449 m_breakiterator_->isBoundary(m_search_->matchedIndex + 450 m_search_->matchedLength)) 451#endif 452 ) { 453 return m_search_->matchedIndex; 454 } 455 } 456#else 457 ucol_setOffset(m_strsrch_->textIter, position, &status); 458 459 if (m_search_->isCanonicalMatch) { 460 // *could* use exact match here since extra accents *not* allowed! 461 usearch_handlePreviousCanonical(m_strsrch_, &status); 462 } else { 463 usearch_handlePreviousExact(m_strsrch_, &status); 464 } 465 466 if (U_FAILURE(status)) { 467 return USEARCH_DONE; 468 } 469 470 return m_search_->matchedIndex; 471#endif 472 } 473 474 return m_search_->matchedIndex; 475 } 476 return USEARCH_DONE; 477} 478 479U_NAMESPACE_END 480 481#endif /* #if !UCONFIG_NO_COLLATION */ 482