1/* 2********************************************************************** 3* Copyright (C) 2001-2008 IBM and others. All rights reserved. 4********************************************************************** 5* Date Name Description 6* 03/22/2000 helena Creation. 7********************************************************************** 8*/ 9 10#include "unicode/utypes.h" 11 12#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 13 14#include "unicode/brkiter.h" 15#include "unicode/schriter.h" 16#include "unicode/search.h" 17#include "usrchimp.h" 18#include "cmemory.h" 19 20// public constructors and destructors ----------------------------------- 21U_NAMESPACE_BEGIN 22 23SearchIterator::SearchIterator(const SearchIterator &other) 24 : UObject(other) 25{ 26 m_breakiterator_ = other.m_breakiterator_; 27 m_text_ = other.m_text_; 28 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 29 m_search_->breakIter = other.m_search_->breakIter; 30 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; 31 m_search_->isOverlap = other.m_search_->isOverlap; 32 m_search_->matchedIndex = other.m_search_->matchedIndex; 33 m_search_->matchedLength = other.m_search_->matchedLength; 34 m_search_->text = other.m_search_->text; 35 m_search_->textLength = other.m_search_->textLength; 36} 37 38SearchIterator::~SearchIterator() 39{ 40 if (m_search_ != NULL) { 41 uprv_free(m_search_); 42 } 43} 44 45// public get and set methods ---------------------------------------- 46 47void SearchIterator::setAttribute(USearchAttribute attribute, 48 USearchAttributeValue value, 49 UErrorCode &status) 50{ 51 if (U_SUCCESS(status)) { 52 switch (attribute) 53 { 54 case USEARCH_OVERLAP : 55 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); 56 break; 57 case USEARCH_CANONICAL_MATCH : 58 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); 59 break; 60 default: 61 status = U_ILLEGAL_ARGUMENT_ERROR; 62 } 63 } 64 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { 65 status = U_ILLEGAL_ARGUMENT_ERROR; 66 } 67} 68 69USearchAttributeValue SearchIterator::getAttribute( 70 USearchAttribute attribute) const 71{ 72 switch (attribute) { 73 case USEARCH_OVERLAP : 74 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); 75 case USEARCH_CANONICAL_MATCH : 76 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 77 USEARCH_OFF); 78 default : 79 return USEARCH_DEFAULT; 80 } 81} 82 83int32_t SearchIterator::getMatchedStart() const 84{ 85 return m_search_->matchedIndex; 86} 87 88int32_t SearchIterator::getMatchedLength() const 89{ 90 return m_search_->matchedLength; 91} 92 93void SearchIterator::getMatchedText(UnicodeString &result) const 94{ 95 int32_t matchedindex = m_search_->matchedIndex; 96 int32_t matchedlength = m_search_->matchedLength; 97 if (matchedindex != USEARCH_DONE && matchedlength != 0) { 98 result.setTo(m_search_->text + matchedindex, matchedlength); 99 } 100 else { 101 result.remove(); 102 } 103} 104 105void SearchIterator::setBreakIterator(BreakIterator *breakiter, 106 UErrorCode &status) 107{ 108 if (U_SUCCESS(status)) { 109#if 0 110 m_search_->breakIter = NULL; 111 // the c++ breakiterator may not make use of ubreakiterator. 112 // so we'll have to keep track of it ourselves. 113#else 114 // Well, gee... the Constructors that take a BreakIterator 115 // all cast the BreakIterator to a UBreakIterator and 116 // pass it to the corresponding usearch_openFromXXX 117 // routine, so there's no reason not to do this. 118 // 119 // Besides, a UBreakIterator is a BreakIterator, so 120 // any subclass of BreakIterator should work fine here... 121 m_search_->breakIter = (UBreakIterator *) breakiter; 122#endif 123 124 m_breakiterator_ = breakiter; 125 } 126} 127 128const BreakIterator * SearchIterator::getBreakIterator(void) const 129{ 130 return m_breakiterator_; 131} 132 133void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) 134{ 135 if (U_SUCCESS(status)) { 136 if (text.length() == 0) { 137 status = U_ILLEGAL_ARGUMENT_ERROR; 138 } 139 else { 140 m_text_ = text; 141 m_search_->text = m_text_.getBuffer(); 142 m_search_->textLength = m_text_.length(); 143 } 144 } 145} 146 147void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) 148{ 149 if (U_SUCCESS(status)) { 150 text.getText(m_text_); 151 setText(m_text_, status); 152 } 153} 154 155const UnicodeString & SearchIterator::getText(void) const 156{ 157 return m_text_; 158} 159 160// operator overloading ---------------------------------------------- 161 162UBool SearchIterator::operator==(const SearchIterator &that) const 163{ 164 if (this == &that) { 165 return TRUE; 166 } 167 return (m_breakiterator_ == that.m_breakiterator_ && 168 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && 169 m_search_->isOverlap == that.m_search_->isOverlap && 170 m_search_->matchedIndex == that.m_search_->matchedIndex && 171 m_search_->matchedLength == that.m_search_->matchedLength && 172 m_search_->textLength == that.m_search_->textLength && 173 getOffset() == that.getOffset() && 174 (uprv_memcmp(m_search_->text, that.m_search_->text, 175 m_search_->textLength * sizeof(UChar)) == 0)); 176} 177 178// public methods ---------------------------------------------------- 179 180int32_t SearchIterator::first(UErrorCode &status) 181{ 182 if (U_FAILURE(status)) { 183 return USEARCH_DONE; 184 } 185 setOffset(0, status); 186 return handleNext(0, status); 187} 188 189int32_t SearchIterator::following(int32_t position, 190 UErrorCode &status) 191{ 192 if (U_FAILURE(status)) { 193 return USEARCH_DONE; 194 } 195 setOffset(position, status); 196 return handleNext(position, status); 197} 198 199int32_t SearchIterator::last(UErrorCode &status) 200{ 201 if (U_FAILURE(status)) { 202 return USEARCH_DONE; 203 } 204 setOffset(m_search_->textLength, status); 205 return handlePrev(m_search_->textLength, status); 206} 207 208int32_t SearchIterator::preceding(int32_t position, 209 UErrorCode &status) 210{ 211 if (U_FAILURE(status)) { 212 return USEARCH_DONE; 213 } 214 setOffset(position, status); 215 return handlePrev(position, status); 216} 217 218int32_t SearchIterator::next(UErrorCode &status) 219{ 220 if (U_SUCCESS(status)) { 221 int32_t offset = getOffset(); 222 int32_t matchindex = m_search_->matchedIndex; 223 int32_t matchlength = m_search_->matchedLength; 224 m_search_->reset = FALSE; 225 if (m_search_->isForwardSearching == TRUE) { 226 int32_t textlength = m_search_->textLength; 227 if (offset == textlength || matchindex == textlength || 228 (matchindex != USEARCH_DONE && 229 matchindex + matchlength >= textlength)) { 230 // not enough characters to match 231 setMatchNotFound(); 232 return USEARCH_DONE; 233 } 234 } 235 else { 236 // switching direction. 237 // if matchedIndex == USEARCH_DONE, it means that either a 238 // setOffset has been called or that previous ran off the text 239 // string. the iterator would have been set to offset 0 if a 240 // match is not found. 241 m_search_->isForwardSearching = TRUE; 242 if (m_search_->matchedIndex != USEARCH_DONE) { 243 // there's no need to set the collation element iterator 244 // the next call to next will set the offset. 245 return matchindex; 246 } 247 } 248 249 if (matchlength > 0) { 250 // if matchlength is 0 we are at the start of the iteration 251 if (m_search_->isOverlap) { 252 offset ++; 253 } 254 else { 255 offset += matchlength; 256 } 257 } 258 return handleNext(offset, status); 259 } 260 return USEARCH_DONE; 261} 262 263int32_t SearchIterator::previous(UErrorCode &status) 264{ 265 if (U_SUCCESS(status)) { 266 int32_t offset; 267 if (m_search_->reset) { 268 offset = m_search_->textLength; 269 m_search_->isForwardSearching = FALSE; 270 m_search_->reset = FALSE; 271 setOffset(offset, status); 272 } 273 else { 274 offset = getOffset(); 275 } 276 277 int32_t matchindex = m_search_->matchedIndex; 278 if (m_search_->isForwardSearching == TRUE) { 279 // switching direction. 280 // if matchedIndex == USEARCH_DONE, it means that either a 281 // setOffset has been called or that next ran off the text 282 // string. the iterator would have been set to offset textLength if 283 // a match is not found. 284 m_search_->isForwardSearching = FALSE; 285 if (matchindex != USEARCH_DONE) { 286 return matchindex; 287 } 288 } 289 else { 290 if (offset == 0 || matchindex == 0) { 291 // not enough characters to match 292 setMatchNotFound(); 293 return USEARCH_DONE; 294 } 295 } 296 297 if (matchindex != USEARCH_DONE) { 298 if (m_search_->isOverlap) { 299 matchindex += m_search_->matchedLength - 2; 300 } 301 302 return handlePrev(matchindex, status); 303 } 304 305 return handlePrev(offset, status); 306 } 307 308 return USEARCH_DONE; 309} 310 311void SearchIterator::reset() 312{ 313 UErrorCode status = U_ZERO_ERROR; 314 setMatchNotFound(); 315 setOffset(0, status); 316 m_search_->isOverlap = FALSE; 317 m_search_->isCanonicalMatch = FALSE; 318 m_search_->isForwardSearching = TRUE; 319 m_search_->reset = TRUE; 320} 321 322// protected constructors and destructors ----------------------------- 323 324SearchIterator::SearchIterator() 325{ 326 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 327 m_search_->breakIter = NULL; 328 m_search_->isOverlap = FALSE; 329 m_search_->isCanonicalMatch = FALSE; 330 m_search_->isForwardSearching = TRUE; 331 m_search_->reset = TRUE; 332 m_search_->matchedIndex = USEARCH_DONE; 333 m_search_->matchedLength = 0; 334 m_search_->text = NULL; 335 m_search_->textLength = 0; 336 m_breakiterator_ = NULL; 337} 338 339SearchIterator::SearchIterator(const UnicodeString &text, 340 BreakIterator *breakiter) : 341 m_breakiterator_(breakiter), 342 m_text_(text) 343{ 344 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 345 m_search_->breakIter = NULL; 346 m_search_->isOverlap = FALSE; 347 m_search_->isCanonicalMatch = FALSE; 348 m_search_->isForwardSearching = TRUE; 349 m_search_->reset = TRUE; 350 m_search_->matchedIndex = USEARCH_DONE; 351 m_search_->matchedLength = 0; 352 m_search_->text = m_text_.getBuffer(); 353 m_search_->textLength = text.length(); 354} 355 356SearchIterator::SearchIterator(CharacterIterator &text, 357 BreakIterator *breakiter) : 358 m_breakiterator_(breakiter) 359{ 360 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 361 m_search_->breakIter = NULL; 362 m_search_->isOverlap = FALSE; 363 m_search_->isCanonicalMatch = FALSE; 364 m_search_->isForwardSearching = TRUE; 365 m_search_->reset = TRUE; 366 m_search_->matchedIndex = USEARCH_DONE; 367 m_search_->matchedLength = 0; 368 text.getText(m_text_); 369 m_search_->text = m_text_.getBuffer(); 370 m_search_->textLength = m_text_.length(); 371 m_breakiterator_ = breakiter; 372} 373 374// protected methods ------------------------------------------------------ 375 376SearchIterator & SearchIterator::operator=(const SearchIterator &that) 377{ 378 if (this != &that) { 379 m_breakiterator_ = that.m_breakiterator_; 380 m_text_ = that.m_text_; 381 m_search_->breakIter = that.m_search_->breakIter; 382 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; 383 m_search_->isOverlap = that.m_search_->isOverlap; 384 m_search_->matchedIndex = that.m_search_->matchedIndex; 385 m_search_->matchedLength = that.m_search_->matchedLength; 386 m_search_->text = that.m_search_->text; 387 m_search_->textLength = that.m_search_->textLength; 388 } 389 return *this; 390} 391 392void SearchIterator::setMatchLength(int32_t length) 393{ 394 m_search_->matchedLength = length; 395} 396 397void SearchIterator::setMatchStart(int32_t position) 398{ 399 m_search_->matchedIndex = position; 400} 401 402void SearchIterator::setMatchNotFound() 403{ 404 setMatchStart(USEARCH_DONE); 405 setMatchLength(0); 406 UErrorCode status = U_ZERO_ERROR; 407 // by default no errors should be returned here since offsets are within 408 // range. 409 if (m_search_->isForwardSearching) { 410 setOffset(m_search_->textLength, status); 411 } 412 else { 413 setOffset(0, status); 414 } 415} 416 417 418U_NAMESPACE_END 419 420#endif /* #if !UCONFIG_NO_COLLATION */ 421