1/*
2 ******************************************************************************
3 *   Copyright (C) 1996-2009, International Business Machines                 *
4 *   Corporation and others.  All Rights Reserved.                            *
5 ******************************************************************************
6 */
7
8#include "unicode/utypes.h"
9
10#if !UCONFIG_NO_COLLATION
11
12#include "unicode/unistr.h"
13#include "unicode/putil.h"
14#include "unicode/usearch.h"
15
16#include "cmemory.h"
17#include "unicode/coll.h"
18#include "unicode/tblcoll.h"
19#include "unicode/coleitr.h"
20#include "unicode/ucoleitr.h"
21
22#include "unicode/regex.h"        // TODO: make conditional on regexp being built.
23
24#include "unicode/uniset.h"
25#include "unicode/uset.h"
26#include "unicode/ustring.h"
27#include "hash.h"
28#include "uhash.h"
29#include "ucol_imp.h"
30#include "unormimp.h"
31
32#include "unicode/colldata.h"
33#include "unicode/bmsearch.h"
34
35U_NAMESPACE_BEGIN
36
37#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
38#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
39#define DELETE_ARRAY(array) uprv_free((void *) (array))
40
41
42struct CEI
43{
44    uint32_t order;
45    int32_t  lowOffset;
46    int32_t  highOffset;
47};
48
49class Target : public UMemory
50{
51public:
52    Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status);
53    ~Target();
54
55    void setTargetString(const UnicodeString *target);
56
57    const CEI *nextCE(int32_t offset);
58    const CEI *prevCE(int32_t offset);
59
60    int32_t stringLength();
61    UChar charAt(int32_t offset);
62
63    UBool isBreakBoundary(int32_t offset);
64    int32_t nextBreakBoundary(int32_t offset);
65    int32_t nextSafeBoundary(int32_t offset);
66
67    UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);
68
69    void setOffset(int32_t offset);
70    void setLast(int32_t last);
71    int32_t getOffset();
72
73private:
74    CEI *ceb;
75    int32_t bufferSize;
76    int32_t bufferMin;
77    int32_t bufferMax;
78
79    uint32_t strengthMask;
80    UCollationStrength strength;
81    uint32_t variableTop;
82    UBool toShift;
83    UCollator *coll;
84
85    const UnicodeString *targetString;
86    const UChar *targetBuffer;
87    int32_t targetLength;
88
89    UCollationElements *elements;
90    UBreakIterator *charBreakIterator;
91};
92
93Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
94    : bufferSize(0), bufferMin(0), bufferMax(0),
95      strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
96      targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
97{
98    strength = ucol_getStrength(coll);
99    toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
100    variableTop = ucol_getVariableTop(coll, &status);
101
102    // find the largest expansion
103    uint8_t maxExpansion = 0;
104    for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
105        if (*expansion > maxExpansion) {
106            maxExpansion = *expansion;
107        }
108    }
109
110    // room for an extra character on each end, plus 4 for safety
111    bufferSize = patternLength + (2 * maxExpansion) + 4;
112
113    ceb = NEW_ARRAY(CEI, bufferSize);
114
115    if (ceb == NULL) {
116        status = U_MEMORY_ALLOCATION_ERROR;
117        return;
118    }
119
120    if (target != NULL) {
121        setTargetString(target);
122    }
123
124    switch (strength)
125    {
126    default:
127        strengthMask |= UCOL_TERTIARYORDERMASK;
128        /* fall through */
129
130    case UCOL_SECONDARY:
131        strengthMask |= UCOL_SECONDARYORDERMASK;
132        /* fall through */
133
134    case UCOL_PRIMARY:
135        strengthMask |= UCOL_PRIMARYORDERMASK;
136    }
137}
138
139Target::~Target()
140{
141    ubrk_close(charBreakIterator);
142    ucol_closeElements(elements);
143
144    DELETE_ARRAY(ceb);
145}
146
147void Target::setTargetString(const UnicodeString *target)
148{
149    if (charBreakIterator != NULL) {
150        ubrk_close(charBreakIterator);
151        ucol_closeElements(elements);
152    }
153
154    targetString = target;
155
156    if (targetString != NULL) {
157        UErrorCode status = U_ZERO_ERROR;
158
159        targetBuffer = targetString->getBuffer();
160        targetLength = targetString->length();
161
162        elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
163        ucol_forceHanImplicit(elements, &status);
164
165        charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
166                                      targetBuffer, targetLength, &status);
167    } else {
168        targetBuffer = NULL;
169        targetLength = 0;
170    }
171}
172
173const CEI *Target::nextCE(int32_t offset)
174{
175    UErrorCode status = U_ZERO_ERROR;
176    int32_t low = -1, high = -1;
177    uint32_t order;
178    UBool cont = FALSE;
179
180    if (offset >= bufferMin && offset < bufferMax) {
181        return &ceb[offset];
182    }
183
184    if (bufferMax >= bufferSize || offset != bufferMax) {
185        return NULL;
186    }
187
188    do {
189        low   = ucol_getOffset(elements);
190        order = ucol_next(elements, &status);
191        high  = ucol_getOffset(elements);
192
193        if (order == (uint32_t)UCOL_NULLORDER) {
194          //high = low = -1;
195            break;
196        }
197
198        cont = isContinuation(order);
199        order &= strengthMask;
200
201        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
202            if (strength >= UCOL_QUATERNARY) {
203                order &= UCOL_PRIMARYORDERMASK;
204            } else {
205                order = UCOL_IGNORABLE;
206            }
207        }
208    } while (order == UCOL_IGNORABLE);
209
210    if (cont) {
211        order |= UCOL_CONTINUATION_MARKER;
212    }
213
214    ceb[offset].order = order;
215    ceb[offset].lowOffset = low;
216    ceb[offset].highOffset = high;
217
218    bufferMax += 1;
219
220    return &ceb[offset];
221}
222
223const CEI *Target::prevCE(int32_t offset)
224{
225    UErrorCode status = U_ZERO_ERROR;
226    int32_t low = -1, high = -1;
227    uint32_t order;
228    UBool cont = FALSE;
229
230    if (offset >= bufferMin && offset < bufferMax) {
231        return &ceb[offset];
232    }
233
234    if (bufferMax >= bufferSize || offset != bufferMax) {
235        return NULL;
236    }
237
238    do {
239        high  = ucol_getOffset(elements);
240        order = ucol_previous(elements, &status);
241        low   = ucol_getOffset(elements);
242
243        if (order == (uint32_t)UCOL_NULLORDER) {
244            break;
245        }
246
247        cont = isContinuation(order);
248        order &= strengthMask;
249
250        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
251            if (strength >= UCOL_QUATERNARY) {
252                order &= UCOL_PRIMARYORDERMASK;
253            } else {
254                order = UCOL_IGNORABLE;
255            }
256        }
257    } while (order == UCOL_IGNORABLE);
258
259    bufferMax += 1;
260
261    if (cont) {
262        order |= UCOL_CONTINUATION_MARKER;
263    }
264
265    ceb[offset].order       = order;
266    ceb[offset].lowOffset   = low;
267    ceb[offset].highOffset = high;
268
269    return &ceb[offset];
270}
271
272int32_t Target::stringLength()
273{
274    if (targetString != NULL) {
275        return targetLength;
276    }
277
278    return 0;
279}
280
281UChar Target::charAt(int32_t offset)
282{
283    if (targetString != NULL) {
284        return targetBuffer[offset];
285    }
286
287    return 0x0000;
288}
289
290void Target::setOffset(int32_t offset)
291{
292    UErrorCode status = U_ZERO_ERROR;
293
294    bufferMin = 0;
295    bufferMax = 0;
296
297    ucol_setOffset(elements, offset, &status);
298}
299
300void Target::setLast(int32_t last)
301{
302    UErrorCode status = U_ZERO_ERROR;
303
304    bufferMin = 0;
305    bufferMax = 1;
306
307    ceb[0].order      = UCOL_NULLORDER;
308    ceb[0].lowOffset  = last;
309    ceb[0].highOffset = last;
310
311    ucol_setOffset(elements, last, &status);
312}
313
314int32_t Target::getOffset()
315{
316    return ucol_getOffset(elements);
317}
318
319UBool Target::isBreakBoundary(int32_t offset)
320{
321    return ubrk_isBoundary(charBreakIterator, offset);
322}
323
324int32_t Target::nextBreakBoundary(int32_t offset)
325{
326    return ubrk_following(charBreakIterator, offset);
327}
328
329int32_t Target::nextSafeBoundary(int32_t offset)
330{
331    while (offset < targetLength) {
332      //UChar ch = charAt(offset);
333        UChar ch = targetBuffer[offset];
334
335        if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) {
336            return offset;
337        }
338
339        offset += 1;
340    }
341
342    return targetLength;
343}
344
345UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
346{
347    if (strength < UCOL_IDENTICAL) {
348        return TRUE;
349    }
350
351    UChar t2[32], p2[32];
352    const UChar *pBuffer = pattern.getBuffer();
353    int32_t pLength = pattern.length();
354    int32_t length = end - start;
355
356    UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
357
358    int32_t decomplength = unorm_decompose(t2, ARRAY_SIZE(t2),
359                                       targetBuffer + start, length,
360                                       FALSE, 0, &status);
361
362    // use separate status2 in case of buffer overflow
363    if (decomplength != unorm_decompose(p2, ARRAY_SIZE(p2),
364                                        pBuffer, pLength,
365                                        FALSE, 0, &status2)) {
366        return FALSE; // lengths are different
367    }
368
369    // compare contents
370    UChar *text, *pat;
371
372    if(U_SUCCESS(status)) {
373        text = t2;
374        pat = p2;
375    } else if(status == U_BUFFER_OVERFLOW_ERROR) {
376        status = U_ZERO_ERROR;
377
378        // allocate one buffer for both decompositions
379        text = NEW_ARRAY(UChar, decomplength * 2);
380
381        // Check for allocation failure.
382        if (text == NULL) {
383        	return FALSE;
384        }
385
386        pat = text + decomplength;
387
388        unorm_decompose(text, decomplength, targetBuffer + start,
389                        length, FALSE, 0, &status);
390
391        unorm_decompose(pat, decomplength, pBuffer,
392                        pLength, FALSE, 0, &status);
393    } else {
394        // NFD failed, make sure that u_memcmp() does not overrun t2 & p2
395        // and that we don't uprv_free() an undefined text pointer
396        text = pat = t2;
397        decomplength = 0;
398    }
399
400    UBool result = (UBool)(u_memcmp(pat, text, decomplength) == 0);
401
402    if(text != t2) {
403        DELETE_ARRAY(text);
404    }
405
406    // return FALSE if NFD failed
407    return U_SUCCESS(status) && result;
408}
409
410#define HASH_TABLE_SIZE 257
411
412class BadCharacterTable : public UMemory
413{
414public:
415    BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);
416    ~BadCharacterTable();
417
418    int32_t operator[](uint32_t ce) const;
419    int32_t getMaxSkip() const;
420    int32_t minLengthInChars(int32_t index);
421
422private:
423    static int32_t hash(uint32_t ce);
424
425    int32_t maxSkip;
426    int32_t badCharacterTable[HASH_TABLE_SIZE];
427
428    int32_t *minLengthCache;
429};
430
431BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)
432    : minLengthCache(NULL)
433{
434    int32_t plen = patternCEs.size();
435
436    // **** need a better way to deal with this ****
437    if (U_FAILURE(status) || plen == 0) {
438        return;
439    }
440
441    int32_t *history = NEW_ARRAY(int32_t, plen);
442
443    if (history == NULL) {
444        status = U_MEMORY_ALLOCATION_ERROR;
445        return;
446    }
447
448    for (int32_t i = 0; i < plen; i += 1) {
449        history[i] = -1;
450    }
451
452    minLengthCache = NEW_ARRAY(int32_t, plen + 1);
453
454    if (minLengthCache == NULL) {
455        DELETE_ARRAY(history);
456        status = U_MEMORY_ALLOCATION_ERROR;
457        return;
458    }
459
460    maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);
461
462    for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {
463        badCharacterTable[j] = maxSkip;
464    }
465
466    for(int32_t p = 1; p < plen; p += 1) {
467        minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);
468
469        // Make sure this entry is not bigger than the previous one.
470        // Otherwise, we might skip too far in some cases.
471        if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) {
472            minLengthCache[p] = minLengthCache[p - 1];
473        }
474    }
475
476    minLengthCache[plen] = 0;
477
478    for(int32_t p = 0; p < plen - 1; p += 1) {
479        badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];
480    }
481
482    DELETE_ARRAY(history);
483}
484
485BadCharacterTable::~BadCharacterTable()
486{
487    DELETE_ARRAY(minLengthCache);
488}
489
490int32_t BadCharacterTable::operator[](uint32_t ce) const
491{
492    return badCharacterTable[hash(ce)];
493}
494
495int32_t BadCharacterTable::getMaxSkip() const
496{
497    return maxSkip;
498}
499
500int32_t BadCharacterTable::minLengthInChars(int32_t index)
501{
502    return minLengthCache[index];
503}
504
505int32_t BadCharacterTable::hash(uint32_t ce)
506{
507    return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;
508}
509
510class GoodSuffixTable : public UMemory
511{
512public:
513    GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);
514    ~GoodSuffixTable();
515
516    int32_t operator[](int32_t offset) const;
517
518private:
519    int32_t *goodSuffixTable;
520};
521
522GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)
523    : goodSuffixTable(NULL)
524{
525    int32_t patlen = patternCEs.size();
526
527    // **** need a better way to deal with this ****
528    if (U_FAILURE(status) || patlen <= 0) {
529        return;
530    }
531
532    int32_t *suff  = NEW_ARRAY(int32_t, patlen);
533    int32_t start = patlen - 1, end = - 1;
534    int32_t maxSkip = badCharacterTable.getMaxSkip();
535
536    if (suff == NULL) {
537        status = U_MEMORY_ALLOCATION_ERROR;
538        return;
539    }
540
541    // initialze suff
542    suff[patlen - 1] = patlen;
543
544    for (int32_t i = patlen - 2; i >= 0; i -= 1) {
545        // (i > start) means we're inside the last suffix match we found
546        // ((patlen - 1) - end) is how far the end of that match is from end of pattern
547        // (i - start) is how far we are from start of that match
548        // (i + (patlen - 1) - end) is index of same character at end of pattern
549        // so if any suffix match at that character doesn't extend beyond the last match,
550        // it's the suffix for this character as well
551        if (i > start && suff[i + patlen - 1 - end] < i - start) {
552            suff[i] = suff[i + patlen - 1 - end];
553        } else {
554            start = end = i;
555
556            int32_t s = patlen;
557
558            while (start >= 0 && patternCEs[start] == patternCEs[--s]) {
559                start -= 1;
560            }
561
562            suff[i] = end - start;
563        }
564    }
565
566    // now build goodSuffixTable
567    goodSuffixTable  = NEW_ARRAY(int32_t, patlen);
568
569    if (goodSuffixTable == NULL) {
570        DELETE_ARRAY(suff);
571        status = U_MEMORY_ALLOCATION_ERROR;
572        return;
573    }
574
575
576    // initialize entries to minLengthInChars of the pattern
577    for (int32_t i = 0; i < patlen; i += 1) {
578        goodSuffixTable[i] = maxSkip;
579    }
580
581    int32_t prefix = 0;
582
583    for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) {
584        if (suff[i] == i + 1) {
585            // this matching suffix is a prefix of the pattern
586            int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);
587
588            // for any mis-match before this suffix, we should skip
589            // so that the front of the pattern (i.e. the prefix)
590            // lines up with the front of the suffix.
591            // (patlen - 1 - i) is the start of the suffix
592            while (prefix < patlen - 1 - i) {
593                // value of maxSkip means never set...
594                if (goodSuffixTable[prefix] == maxSkip) {
595                    goodSuffixTable[prefix] = prefixSkip;
596                }
597
598                prefix += 1;
599            }
600        }
601    }
602
603    for (int32_t i = 0; i < patlen - 1; i += 1) {
604        goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);
605    }
606
607    DELETE_ARRAY(suff);
608}
609
610GoodSuffixTable::~GoodSuffixTable()
611{
612    DELETE_ARRAY(goodSuffixTable);
613}
614
615int32_t GoodSuffixTable::operator[](int32_t offset) const
616{
617    return goodSuffixTable[offset];
618}
619
620UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)
621
622
623UBool BoyerMooreSearch::empty()
624{
625    return patCEs->size() <= 0;
626}
627
628CollData *BoyerMooreSearch::getData()
629{
630    return data;
631}
632
633CEList *BoyerMooreSearch::getPatternCEs()
634{
635    return patCEs;
636}
637
638BadCharacterTable *BoyerMooreSearch::getBadCharacterTable()
639{
640    return badCharacterTable;
641}
642
643GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()
644{
645    return goodSuffixTable;
646}
647
648BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString,
649                                   UErrorCode &status)
650    : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)
651{
652
653    if (U_FAILURE(status)) {
654        return;
655    }
656
657    UCollator *collator = data->getCollator();
658
659    patCEs = new CEList(collator, patternString, status);
660
661    if (patCEs == NULL || U_FAILURE(status)) {
662        return;
663    }
664
665    badCharacterTable = new BadCharacterTable(*patCEs, data, status);
666
667    if (badCharacterTable == NULL || U_FAILURE(status)) {
668        return;
669    }
670
671    goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status);
672
673    if (targetString != NULL) {
674        target = new Target(collator, targetString, patCEs->size(), status);
675    }
676}
677
678BoyerMooreSearch::~BoyerMooreSearch()
679{
680    delete target;
681    delete goodSuffixTable;
682    delete badCharacterTable;
683    delete patCEs;
684}
685
686void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)
687{
688    if (U_FAILURE(status)) {
689        return;
690    }
691
692    if (target == NULL) {
693        target = new Target(data->getCollator(), targetString, patCEs->size(), status);
694    } else {
695        target->setTargetString(targetString);
696    }
697}
698
699// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. ****
700/*
701 * TODO:
702 *  * deal with trailing (and leading?) ignorables.
703 *  * Adding BoyerMooreSearch object slowed it down. How can we speed it up?
704 */
705UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)
706{
707    /*UCollator *coll =*/ data->getCollator();
708    int32_t plen = patCEs->size();
709    int32_t tlen = target->stringLength();
710    int32_t maxSkip = badCharacterTable->getMaxSkip();
711    int32_t tOffset = offset + maxSkip;
712
713    if (plen <= 0) {
714        // Searching for a zero length pattern always fails.
715        start = end = -1;
716        return FALSE;
717    }
718
719    while (tOffset <= tlen) {
720        int32_t pIndex = plen - 1;
721        int32_t tIndex = 0;
722        int32_t lIndex = 0;
723
724        if (tOffset < tlen) {
725            // **** we really want to skip ahead enough to  ****
726            // **** be sure we get at least 1 non-ignorable ****
727            // **** CE after the end of the pattern.        ****
728            int32_t next = target->nextSafeBoundary(tOffset + 1);
729
730            target->setOffset(next);
731
732            for (lIndex = 0; ; lIndex += 1) {
733                const CEI *cei = target->prevCE(lIndex);
734                int32_t low = cei->lowOffset;
735                int32_t high = cei->highOffset;
736
737                if (high == 0 || (low < high && low <= tOffset)) {
738                    if (low < tOffset) {
739                        while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {
740                            lIndex -= 1;
741                        }
742
743                        if (high > tOffset) {
744                            tOffset = high;
745                        }
746                    }
747
748                    break;
749                }
750            }
751        } else {
752            target->setLast(tOffset);
753            lIndex = 0;
754        }
755
756        tIndex = ++lIndex;
757
758        // Iterate backward until we hit the beginning of the pattern
759        while (pIndex >= 0) {
760            uint32_t pce = (*patCEs)[pIndex];
761            const CEI *tcei = target->prevCE(tIndex++);
762
763
764            if (tcei->order != pce) {
765                // There is a mismatch at this position.  Decide how far
766                // over to shift the pattern, then try again.
767
768                int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];
769#ifdef EXTRA_CAUTIOUS
770                int32_t old = tOffset;
771#endif
772
773                tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);
774
775                if (gsOffset > tOffset) {
776                    tOffset = gsOffset;
777                }
778
779#ifdef EXTRA_CAUTIOUS
780                // Make sure we don't skip backwards...
781                if (tOffset <= old) {
782                    tOffset = old + 1;
783                }
784#endif
785
786                break;
787            }
788
789            pIndex -= 1;
790        }
791
792        if (pIndex < 0) {
793            // We made it back to the beginning of the pattern,
794            // which means we matched it all.  Return the location.
795            const CEI firstCEI = *target->prevCE(tIndex - 1);
796            const CEI lastCEI  = *target->prevCE(lIndex);
797            int32_t mStart   = firstCEI.lowOffset;
798            int32_t minLimit = lastCEI.lowOffset;
799            int32_t maxLimit = lastCEI.highOffset;
800            int32_t mLimit;
801            UBool found = TRUE;
802
803            target->setOffset(/*tOffset*/maxLimit);
804
805            const CEI nextCEI = *target->nextCE(0);
806
807            if (nextCEI.lowOffset > maxLimit) {
808                maxLimit = nextCEI.lowOffset;
809            }
810
811            if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) {
812                found = FALSE;
813            }
814
815            if (! target->isBreakBoundary(mStart)) {
816                found = FALSE;
817            }
818
819            if (firstCEI.lowOffset == firstCEI.highOffset) {
820                found = FALSE;
821            }
822
823            mLimit = maxLimit;
824            if (minLimit < maxLimit) {
825                int32_t nbb = target->nextBreakBoundary(minLimit);
826
827                if (nbb >= lastCEI.highOffset) {
828                    mLimit = nbb;
829                }
830            }
831
832            if (mLimit > maxLimit) {
833                found = FALSE;
834            }
835
836            if (! target->isBreakBoundary(mLimit)) {
837                found = FALSE;
838            }
839
840            if (! target->isIdentical(pattern, mStart, mLimit)) {
841                found = FALSE;
842            }
843
844            if (found) {
845                start = mStart;
846                end   = mLimit;
847
848                return TRUE;
849            }
850
851            tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?
852        }
853        // Otherwise, we're here because of a mismatch, so keep going....
854    }
855
856    // no match
857   start = -1;
858   end = -1;
859   return FALSE;
860}
861
862U_NAMESPACE_END
863
864#endif // #if !UCONFIG_NO_COLLATION
865