1/* 2 ****************************************************************************** 3 * Copyright (C) 1996-2009, International Business Machines * 4 * Corporation and others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 8#include "unicode/utypes.h" 9 10#if !UCONFIG_NO_COLLATION 11 12#include "unicode/unistr.h" 13#include "unicode/putil.h" 14#include "unicode/usearch.h" 15 16#include "cmemory.h" 17#include "unicode/coll.h" 18#include "unicode/tblcoll.h" 19#include "unicode/coleitr.h" 20#include "unicode/ucoleitr.h" 21 22#include "unicode/regex.h" // TODO: make conditional on regexp being built. 23 24#include "unicode/uniset.h" 25#include "unicode/uset.h" 26#include "unicode/ustring.h" 27#include "hash.h" 28#include "uhash.h" 29#include "ucol_imp.h" 30#include "unormimp.h" 31 32#include "unicode/colldata.h" 33#include "unicode/bmsearch.h" 34 35U_NAMESPACE_BEGIN 36 37#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) 38#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) 39#define DELETE_ARRAY(array) uprv_free((void *) (array)) 40 41 42struct CEI 43{ 44 uint32_t order; 45 int32_t lowOffset; 46 int32_t highOffset; 47}; 48 49class Target : public UMemory 50{ 51public: 52 Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status); 53 ~Target(); 54 55 void setTargetString(const UnicodeString *target); 56 57 const CEI *nextCE(int32_t offset); 58 const CEI *prevCE(int32_t offset); 59 60 int32_t stringLength(); 61 UChar charAt(int32_t offset); 62 63 UBool isBreakBoundary(int32_t offset); 64 int32_t nextBreakBoundary(int32_t offset); 65 int32_t nextSafeBoundary(int32_t offset); 66 67 UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end); 68 69 void setOffset(int32_t offset); 70 void setLast(int32_t last); 71 int32_t getOffset(); 72 73private: 74 CEI *ceb; 75 int32_t bufferSize; 76 int32_t bufferMin; 77 int32_t bufferMax; 78 79 uint32_t strengthMask; 80 UCollationStrength strength; 81 uint32_t variableTop; 82 UBool toShift; 83 UCollator *coll; 84 85 const UnicodeString *targetString; 86 const UChar *targetBuffer; 87 int32_t targetLength; 88 89 UCollationElements *elements; 90 UBreakIterator *charBreakIterator; 91}; 92 93Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status) 94 : bufferSize(0), bufferMin(0), bufferMax(0), 95 strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator), 96 targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL) 97{ 98 strength = ucol_getStrength(coll); 99 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; 100 variableTop = ucol_getVariableTop(coll, &status); 101 102 // find the largest expansion 103 uint8_t maxExpansion = 0; 104 for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) { 105 if (*expansion > maxExpansion) { 106 maxExpansion = *expansion; 107 } 108 } 109 110 // room for an extra character on each end, plus 4 for safety 111 bufferSize = patternLength + (2 * maxExpansion) + 4; 112 113 ceb = NEW_ARRAY(CEI, bufferSize); 114 115 if (ceb == NULL) { 116 status = U_MEMORY_ALLOCATION_ERROR; 117 return; 118 } 119 120 if (target != NULL) { 121 setTargetString(target); 122 } 123 124 switch (strength) 125 { 126 default: 127 strengthMask |= UCOL_TERTIARYORDERMASK; 128 /* fall through */ 129 130 case UCOL_SECONDARY: 131 strengthMask |= UCOL_SECONDARYORDERMASK; 132 /* fall through */ 133 134 case UCOL_PRIMARY: 135 strengthMask |= UCOL_PRIMARYORDERMASK; 136 } 137} 138 139Target::~Target() 140{ 141 ubrk_close(charBreakIterator); 142 ucol_closeElements(elements); 143 144 DELETE_ARRAY(ceb); 145} 146 147void Target::setTargetString(const UnicodeString *target) 148{ 149 if (charBreakIterator != NULL) { 150 ubrk_close(charBreakIterator); 151 ucol_closeElements(elements); 152 } 153 154 targetString = target; 155 156 if (targetString != NULL) { 157 UErrorCode status = U_ZERO_ERROR; 158 159 targetBuffer = targetString->getBuffer(); 160 targetLength = targetString->length(); 161 162 elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status); 163 ucol_forceHanImplicit(elements, &status); 164 165 charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), 166 targetBuffer, targetLength, &status); 167 } else { 168 targetBuffer = NULL; 169 targetLength = 0; 170 } 171} 172 173const CEI *Target::nextCE(int32_t offset) 174{ 175 UErrorCode status = U_ZERO_ERROR; 176 int32_t low = -1, high = -1; 177 uint32_t order; 178 UBool cont = FALSE; 179 180 if (offset >= bufferMin && offset < bufferMax) { 181 return &ceb[offset]; 182 } 183 184 if (bufferMax >= bufferSize || offset != bufferMax) { 185 return NULL; 186 } 187 188 do { 189 low = ucol_getOffset(elements); 190 order = ucol_next(elements, &status); 191 high = ucol_getOffset(elements); 192 193 if (order == (uint32_t)UCOL_NULLORDER) { 194 //high = low = -1; 195 break; 196 } 197 198 cont = isContinuation(order); 199 order &= strengthMask; 200 201 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { 202 if (strength >= UCOL_QUATERNARY) { 203 order &= UCOL_PRIMARYORDERMASK; 204 } else { 205 order = UCOL_IGNORABLE; 206 } 207 } 208 } while (order == UCOL_IGNORABLE); 209 210 if (cont) { 211 order |= UCOL_CONTINUATION_MARKER; 212 } 213 214 ceb[offset].order = order; 215 ceb[offset].lowOffset = low; 216 ceb[offset].highOffset = high; 217 218 bufferMax += 1; 219 220 return &ceb[offset]; 221} 222 223const CEI *Target::prevCE(int32_t offset) 224{ 225 UErrorCode status = U_ZERO_ERROR; 226 int32_t low = -1, high = -1; 227 uint32_t order; 228 UBool cont = FALSE; 229 230 if (offset >= bufferMin && offset < bufferMax) { 231 return &ceb[offset]; 232 } 233 234 if (bufferMax >= bufferSize || offset != bufferMax) { 235 return NULL; 236 } 237 238 do { 239 high = ucol_getOffset(elements); 240 order = ucol_previous(elements, &status); 241 low = ucol_getOffset(elements); 242 243 if (order == (uint32_t)UCOL_NULLORDER) { 244 break; 245 } 246 247 cont = isContinuation(order); 248 order &= strengthMask; 249 250 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { 251 if (strength >= UCOL_QUATERNARY) { 252 order &= UCOL_PRIMARYORDERMASK; 253 } else { 254 order = UCOL_IGNORABLE; 255 } 256 } 257 } while (order == UCOL_IGNORABLE); 258 259 bufferMax += 1; 260 261 if (cont) { 262 order |= UCOL_CONTINUATION_MARKER; 263 } 264 265 ceb[offset].order = order; 266 ceb[offset].lowOffset = low; 267 ceb[offset].highOffset = high; 268 269 return &ceb[offset]; 270} 271 272int32_t Target::stringLength() 273{ 274 if (targetString != NULL) { 275 return targetLength; 276 } 277 278 return 0; 279} 280 281UChar Target::charAt(int32_t offset) 282{ 283 if (targetString != NULL) { 284 return targetBuffer[offset]; 285 } 286 287 return 0x0000; 288} 289 290void Target::setOffset(int32_t offset) 291{ 292 UErrorCode status = U_ZERO_ERROR; 293 294 bufferMin = 0; 295 bufferMax = 0; 296 297 ucol_setOffset(elements, offset, &status); 298} 299 300void Target::setLast(int32_t last) 301{ 302 UErrorCode status = U_ZERO_ERROR; 303 304 bufferMin = 0; 305 bufferMax = 1; 306 307 ceb[0].order = UCOL_NULLORDER; 308 ceb[0].lowOffset = last; 309 ceb[0].highOffset = last; 310 311 ucol_setOffset(elements, last, &status); 312} 313 314int32_t Target::getOffset() 315{ 316 return ucol_getOffset(elements); 317} 318 319UBool Target::isBreakBoundary(int32_t offset) 320{ 321 return ubrk_isBoundary(charBreakIterator, offset); 322} 323 324int32_t Target::nextBreakBoundary(int32_t offset) 325{ 326 return ubrk_following(charBreakIterator, offset); 327} 328 329int32_t Target::nextSafeBoundary(int32_t offset) 330{ 331 while (offset < targetLength) { 332 //UChar ch = charAt(offset); 333 UChar ch = targetBuffer[offset]; 334 335 if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) { 336 return offset; 337 } 338 339 offset += 1; 340 } 341 342 return targetLength; 343} 344 345UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end) 346{ 347 if (strength < UCOL_IDENTICAL) { 348 return TRUE; 349 } 350 351 UChar t2[32], p2[32]; 352 const UChar *pBuffer = pattern.getBuffer(); 353 int32_t pLength = pattern.length(); 354 int32_t length = end - start; 355 356 UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR; 357 358 int32_t decomplength = unorm_decompose(t2, ARRAY_SIZE(t2), 359 targetBuffer + start, length, 360 FALSE, 0, &status); 361 362 // use separate status2 in case of buffer overflow 363 if (decomplength != unorm_decompose(p2, ARRAY_SIZE(p2), 364 pBuffer, pLength, 365 FALSE, 0, &status2)) { 366 return FALSE; // lengths are different 367 } 368 369 // compare contents 370 UChar *text, *pat; 371 372 if(U_SUCCESS(status)) { 373 text = t2; 374 pat = p2; 375 } else if(status == U_BUFFER_OVERFLOW_ERROR) { 376 status = U_ZERO_ERROR; 377 378 // allocate one buffer for both decompositions 379 text = NEW_ARRAY(UChar, decomplength * 2); 380 381 // Check for allocation failure. 382 if (text == NULL) { 383 return FALSE; 384 } 385 386 pat = text + decomplength; 387 388 unorm_decompose(text, decomplength, targetBuffer + start, 389 length, FALSE, 0, &status); 390 391 unorm_decompose(pat, decomplength, pBuffer, 392 pLength, FALSE, 0, &status); 393 } else { 394 // NFD failed, make sure that u_memcmp() does not overrun t2 & p2 395 // and that we don't uprv_free() an undefined text pointer 396 text = pat = t2; 397 decomplength = 0; 398 } 399 400 UBool result = (UBool)(u_memcmp(pat, text, decomplength) == 0); 401 402 if(text != t2) { 403 DELETE_ARRAY(text); 404 } 405 406 // return FALSE if NFD failed 407 return U_SUCCESS(status) && result; 408} 409 410#define HASH_TABLE_SIZE 257 411 412class BadCharacterTable : public UMemory 413{ 414public: 415 BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status); 416 ~BadCharacterTable(); 417 418 int32_t operator[](uint32_t ce) const; 419 int32_t getMaxSkip() const; 420 int32_t minLengthInChars(int32_t index); 421 422private: 423 static int32_t hash(uint32_t ce); 424 425 int32_t maxSkip; 426 int32_t badCharacterTable[HASH_TABLE_SIZE]; 427 428 int32_t *minLengthCache; 429}; 430 431BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status) 432 : minLengthCache(NULL) 433{ 434 int32_t plen = patternCEs.size(); 435 436 // **** need a better way to deal with this **** 437 if (U_FAILURE(status) || plen == 0) { 438 return; 439 } 440 441 int32_t *history = NEW_ARRAY(int32_t, plen); 442 443 if (history == NULL) { 444 status = U_MEMORY_ALLOCATION_ERROR; 445 return; 446 } 447 448 for (int32_t i = 0; i < plen; i += 1) { 449 history[i] = -1; 450 } 451 452 minLengthCache = NEW_ARRAY(int32_t, plen + 1); 453 454 if (minLengthCache == NULL) { 455 DELETE_ARRAY(history); 456 status = U_MEMORY_ALLOCATION_ERROR; 457 return; 458 } 459 460 maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history); 461 462 for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) { 463 badCharacterTable[j] = maxSkip; 464 } 465 466 for(int32_t p = 1; p < plen; p += 1) { 467 minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history); 468 469 // Make sure this entry is not bigger than the previous one. 470 // Otherwise, we might skip too far in some cases. 471 if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) { 472 minLengthCache[p] = minLengthCache[p - 1]; 473 } 474 } 475 476 minLengthCache[plen] = 0; 477 478 for(int32_t p = 0; p < plen - 1; p += 1) { 479 badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1]; 480 } 481 482 DELETE_ARRAY(history); 483} 484 485BadCharacterTable::~BadCharacterTable() 486{ 487 DELETE_ARRAY(minLengthCache); 488} 489 490int32_t BadCharacterTable::operator[](uint32_t ce) const 491{ 492 return badCharacterTable[hash(ce)]; 493} 494 495int32_t BadCharacterTable::getMaxSkip() const 496{ 497 return maxSkip; 498} 499 500int32_t BadCharacterTable::minLengthInChars(int32_t index) 501{ 502 return minLengthCache[index]; 503} 504 505int32_t BadCharacterTable::hash(uint32_t ce) 506{ 507 return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE; 508} 509 510class GoodSuffixTable : public UMemory 511{ 512public: 513 GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status); 514 ~GoodSuffixTable(); 515 516 int32_t operator[](int32_t offset) const; 517 518private: 519 int32_t *goodSuffixTable; 520}; 521 522GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status) 523 : goodSuffixTable(NULL) 524{ 525 int32_t patlen = patternCEs.size(); 526 527 // **** need a better way to deal with this **** 528 if (U_FAILURE(status) || patlen <= 0) { 529 return; 530 } 531 532 int32_t *suff = NEW_ARRAY(int32_t, patlen); 533 int32_t start = patlen - 1, end = - 1; 534 int32_t maxSkip = badCharacterTable.getMaxSkip(); 535 536 if (suff == NULL) { 537 status = U_MEMORY_ALLOCATION_ERROR; 538 return; 539 } 540 541 // initialze suff 542 suff[patlen - 1] = patlen; 543 544 for (int32_t i = patlen - 2; i >= 0; i -= 1) { 545 // (i > start) means we're inside the last suffix match we found 546 // ((patlen - 1) - end) is how far the end of that match is from end of pattern 547 // (i - start) is how far we are from start of that match 548 // (i + (patlen - 1) - end) is index of same character at end of pattern 549 // so if any suffix match at that character doesn't extend beyond the last match, 550 // it's the suffix for this character as well 551 if (i > start && suff[i + patlen - 1 - end] < i - start) { 552 suff[i] = suff[i + patlen - 1 - end]; 553 } else { 554 start = end = i; 555 556 int32_t s = patlen; 557 558 while (start >= 0 && patternCEs[start] == patternCEs[--s]) { 559 start -= 1; 560 } 561 562 suff[i] = end - start; 563 } 564 } 565 566 // now build goodSuffixTable 567 goodSuffixTable = NEW_ARRAY(int32_t, patlen); 568 569 if (goodSuffixTable == NULL) { 570 DELETE_ARRAY(suff); 571 status = U_MEMORY_ALLOCATION_ERROR; 572 return; 573 } 574 575 576 // initialize entries to minLengthInChars of the pattern 577 for (int32_t i = 0; i < patlen; i += 1) { 578 goodSuffixTable[i] = maxSkip; 579 } 580 581 int32_t prefix = 0; 582 583 for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) { 584 if (suff[i] == i + 1) { 585 // this matching suffix is a prefix of the pattern 586 int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1); 587 588 // for any mis-match before this suffix, we should skip 589 // so that the front of the pattern (i.e. the prefix) 590 // lines up with the front of the suffix. 591 // (patlen - 1 - i) is the start of the suffix 592 while (prefix < patlen - 1 - i) { 593 // value of maxSkip means never set... 594 if (goodSuffixTable[prefix] == maxSkip) { 595 goodSuffixTable[prefix] = prefixSkip; 596 } 597 598 prefix += 1; 599 } 600 } 601 } 602 603 for (int32_t i = 0; i < patlen - 1; i += 1) { 604 goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1); 605 } 606 607 DELETE_ARRAY(suff); 608} 609 610GoodSuffixTable::~GoodSuffixTable() 611{ 612 DELETE_ARRAY(goodSuffixTable); 613} 614 615int32_t GoodSuffixTable::operator[](int32_t offset) const 616{ 617 return goodSuffixTable[offset]; 618} 619 620UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch) 621 622 623UBool BoyerMooreSearch::empty() 624{ 625 return patCEs->size() <= 0; 626} 627 628CollData *BoyerMooreSearch::getData() 629{ 630 return data; 631} 632 633CEList *BoyerMooreSearch::getPatternCEs() 634{ 635 return patCEs; 636} 637 638BadCharacterTable *BoyerMooreSearch::getBadCharacterTable() 639{ 640 return badCharacterTable; 641} 642 643GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable() 644{ 645 return goodSuffixTable; 646} 647 648BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, 649 UErrorCode &status) 650 : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL) 651{ 652 653 if (U_FAILURE(status)) { 654 return; 655 } 656 657 UCollator *collator = data->getCollator(); 658 659 patCEs = new CEList(collator, patternString, status); 660 661 if (patCEs == NULL || U_FAILURE(status)) { 662 return; 663 } 664 665 badCharacterTable = new BadCharacterTable(*patCEs, data, status); 666 667 if (badCharacterTable == NULL || U_FAILURE(status)) { 668 return; 669 } 670 671 goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status); 672 673 if (targetString != NULL) { 674 target = new Target(collator, targetString, patCEs->size(), status); 675 } 676} 677 678BoyerMooreSearch::~BoyerMooreSearch() 679{ 680 delete target; 681 delete goodSuffixTable; 682 delete badCharacterTable; 683 delete patCEs; 684} 685 686void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status) 687{ 688 if (U_FAILURE(status)) { 689 return; 690 } 691 692 if (target == NULL) { 693 target = new Target(data->getCollator(), targetString, patCEs->size(), status); 694 } else { 695 target->setTargetString(targetString); 696 } 697} 698 699// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. **** 700/* 701 * TODO: 702 * * deal with trailing (and leading?) ignorables. 703 * * Adding BoyerMooreSearch object slowed it down. How can we speed it up? 704 */ 705UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end) 706{ 707 /*UCollator *coll =*/ data->getCollator(); 708 int32_t plen = patCEs->size(); 709 int32_t tlen = target->stringLength(); 710 int32_t maxSkip = badCharacterTable->getMaxSkip(); 711 int32_t tOffset = offset + maxSkip; 712 713 if (plen <= 0) { 714 // Searching for a zero length pattern always fails. 715 start = end = -1; 716 return FALSE; 717 } 718 719 while (tOffset <= tlen) { 720 int32_t pIndex = plen - 1; 721 int32_t tIndex = 0; 722 int32_t lIndex = 0; 723 724 if (tOffset < tlen) { 725 // **** we really want to skip ahead enough to **** 726 // **** be sure we get at least 1 non-ignorable **** 727 // **** CE after the end of the pattern. **** 728 int32_t next = target->nextSafeBoundary(tOffset + 1); 729 730 target->setOffset(next); 731 732 for (lIndex = 0; ; lIndex += 1) { 733 const CEI *cei = target->prevCE(lIndex); 734 int32_t low = cei->lowOffset; 735 int32_t high = cei->highOffset; 736 737 if (high == 0 || (low < high && low <= tOffset)) { 738 if (low < tOffset) { 739 while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) { 740 lIndex -= 1; 741 } 742 743 if (high > tOffset) { 744 tOffset = high; 745 } 746 } 747 748 break; 749 } 750 } 751 } else { 752 target->setLast(tOffset); 753 lIndex = 0; 754 } 755 756 tIndex = ++lIndex; 757 758 // Iterate backward until we hit the beginning of the pattern 759 while (pIndex >= 0) { 760 uint32_t pce = (*patCEs)[pIndex]; 761 const CEI *tcei = target->prevCE(tIndex++); 762 763 764 if (tcei->order != pce) { 765 // There is a mismatch at this position. Decide how far 766 // over to shift the pattern, then try again. 767 768 int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex]; 769#ifdef EXTRA_CAUTIOUS 770 int32_t old = tOffset; 771#endif 772 773 tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1); 774 775 if (gsOffset > tOffset) { 776 tOffset = gsOffset; 777 } 778 779#ifdef EXTRA_CAUTIOUS 780 // Make sure we don't skip backwards... 781 if (tOffset <= old) { 782 tOffset = old + 1; 783 } 784#endif 785 786 break; 787 } 788 789 pIndex -= 1; 790 } 791 792 if (pIndex < 0) { 793 // We made it back to the beginning of the pattern, 794 // which means we matched it all. Return the location. 795 const CEI firstCEI = *target->prevCE(tIndex - 1); 796 const CEI lastCEI = *target->prevCE(lIndex); 797 int32_t mStart = firstCEI.lowOffset; 798 int32_t minLimit = lastCEI.lowOffset; 799 int32_t maxLimit = lastCEI.highOffset; 800 int32_t mLimit; 801 UBool found = TRUE; 802 803 target->setOffset(/*tOffset*/maxLimit); 804 805 const CEI nextCEI = *target->nextCE(0); 806 807 if (nextCEI.lowOffset > maxLimit) { 808 maxLimit = nextCEI.lowOffset; 809 } 810 811 if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) { 812 found = FALSE; 813 } 814 815 if (! target->isBreakBoundary(mStart)) { 816 found = FALSE; 817 } 818 819 if (firstCEI.lowOffset == firstCEI.highOffset) { 820 found = FALSE; 821 } 822 823 mLimit = maxLimit; 824 if (minLimit < maxLimit) { 825 int32_t nbb = target->nextBreakBoundary(minLimit); 826 827 if (nbb >= lastCEI.highOffset) { 828 mLimit = nbb; 829 } 830 } 831 832 if (mLimit > maxLimit) { 833 found = FALSE; 834 } 835 836 if (! target->isBreakBoundary(mLimit)) { 837 found = FALSE; 838 } 839 840 if (! target->isIdentical(pattern, mStart, mLimit)) { 841 found = FALSE; 842 } 843 844 if (found) { 845 start = mStart; 846 end = mLimit; 847 848 return TRUE; 849 } 850 851 tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip? 852 } 853 // Otherwise, we're here because of a mismatch, so keep going.... 854 } 855 856 // no match 857 start = -1; 858 end = -1; 859 return FALSE; 860} 861 862U_NAMESPACE_END 863 864#endif // #if !UCONFIG_NO_COLLATION 865