1c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott/*
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ******************************************************************************
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *   Copyright (C) 1996-2009, International Business Machines                 *
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *   Corporation and others.  All Rights Reserved.                            *
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ******************************************************************************
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott */
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/utypes.h"
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#if !UCONFIG_NO_COLLATION
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/unistr.h"
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/putil.h"
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/usearch.h"
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "cmemory.h"
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/coll.h"
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/tblcoll.h"
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/coleitr.h"
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/ucoleitr.h"
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/regex.h"        // TODO: make conditional on regexp being built.
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/uniset.h"
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/uset.h"
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/ustring.h"
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "hash.h"
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "uhash.h"
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "ucol_imp.h"
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unormimp.h"
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/colldata.h"
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "unicode/bmsearch.h"
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottU_NAMESPACE_BEGIN
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define DELETE_ARRAY(array) uprv_free((void *) (array))
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstruct CEI
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    uint32_t order;
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t  lowOffset;
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t  highOffset;
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass Target : public UMemory
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottpublic:
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status);
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ~Target();
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    void setTargetString(const UnicodeString *target);
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const CEI *nextCE(int32_t offset);
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const CEI *prevCE(int32_t offset);
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t stringLength();
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UChar charAt(int32_t offset);
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UBool isBreakBoundary(int32_t offset);
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t nextBreakBoundary(int32_t offset);
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t nextSafeBoundary(int32_t offset);
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end);
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    void setOffset(int32_t offset);
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    void setLast(int32_t last);
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t getOffset();
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottprivate:
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    CEI *ceb;
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t bufferSize;
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t bufferMin;
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t bufferMax;
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    uint32_t strengthMask;
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UCollationStrength strength;
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    uint32_t variableTop;
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UBool toShift;
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UCollator *coll;
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const UnicodeString *targetString;
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const UChar *targetBuffer;
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t targetLength;
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UCollationElements *elements;
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UBreakIterator *charBreakIterator;
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTarget::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status)
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    : bufferSize(0), bufferMin(0), bufferMax(0),
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator),
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL)
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    strength = ucol_getStrength(coll);
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) ==  UCOL_SHIFTED;
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    variableTop = ucol_getVariableTop(coll, &status);
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // find the largest expansion
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    uint8_t maxExpansion = 0;
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) {
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (*expansion > maxExpansion) {
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            maxExpansion = *expansion;
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // room for an extra character on each end, plus 4 for safety
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bufferSize = patternLength + (2 * maxExpansion) + 4;
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb = NEW_ARRAY(CEI, bufferSize);
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (ceb == NULL) {
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        status = U_MEMORY_ALLOCATION_ERROR;
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (target != NULL) {
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        setTargetString(target);
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    switch (strength)
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    {
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    default:
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        strengthMask |= UCOL_TERTIARYORDERMASK;
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        /* fall through */
129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    case UCOL_SECONDARY:
131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        strengthMask |= UCOL_SECONDARYORDERMASK;
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        /* fall through */
133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    case UCOL_PRIMARY:
135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        strengthMask |= UCOL_PRIMARYORDERMASK;
136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottTarget::~Target()
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ubrk_close(charBreakIterator);
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ucol_closeElements(elements);
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DELETE_ARRAY(ceb);
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid Target::setTargetString(const UnicodeString *target)
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (charBreakIterator != NULL) {
150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        ubrk_close(charBreakIterator);
151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        ucol_closeElements(elements);
152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    targetString = target;
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (targetString != NULL) {
157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        UErrorCode status = U_ZERO_ERROR;
158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        targetBuffer = targetString->getBuffer();
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        targetLength = targetString->length();
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status);
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        ucol_forceHanImplicit(elements, &status);
164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                      targetBuffer, targetLength, &status);
167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else {
168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        targetBuffer = NULL;
169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        targetLength = 0;
170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
172c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst CEI *Target::nextCE(int32_t offset)
174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UErrorCode status = U_ZERO_ERROR;
176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t low = -1, high = -1;
177c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    uint32_t order;
178c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UBool cont = FALSE;
179c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
180c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (offset >= bufferMin && offset < bufferMax) {
181c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return &ceb[offset];
182c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
183c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
184c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (bufferMax >= bufferSize || offset != bufferMax) {
185c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return NULL;
186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    do {
189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        low   = ucol_getOffset(elements);
190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        order = ucol_next(elements, &status);
191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        high  = ucol_getOffset(elements);
192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (order == (uint32_t)UCOL_NULLORDER) {
194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          //high = low = -1;
195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            break;
196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        cont = isContinuation(order);
199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        order &= strengthMask;
200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (strength >= UCOL_QUATERNARY) {
203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                order &= UCOL_PRIMARYORDERMASK;
204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            } else {
205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                order = UCOL_IGNORABLE;
206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } while (order == UCOL_IGNORABLE);
209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (cont) {
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        order |= UCOL_CONTINUATION_MARKER;
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[offset].order = order;
215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[offset].lowOffset = low;
216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[offset].highOffset = high;
217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bufferMax += 1;
219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return &ceb[offset];
221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst CEI *Target::prevCE(int32_t offset)
224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UErrorCode status = U_ZERO_ERROR;
226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t low = -1, high = -1;
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    uint32_t order;
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UBool cont = FALSE;
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (offset >= bufferMin && offset < bufferMax) {
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return &ceb[offset];
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (bufferMax >= bufferSize || offset != bufferMax) {
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return NULL;
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    do {
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        high  = ucol_getOffset(elements);
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        order = ucol_previous(elements, &status);
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        low   = ucol_getOffset(elements);
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (order == (uint32_t)UCOL_NULLORDER) {
244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            break;
245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        cont = isContinuation(order);
248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        order &= strengthMask;
249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) {
251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (strength >= UCOL_QUATERNARY) {
252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                order &= UCOL_PRIMARYORDERMASK;
253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            } else {
254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                order = UCOL_IGNORABLE;
255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } while (order == UCOL_IGNORABLE);
258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bufferMax += 1;
260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (cont) {
262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        order |= UCOL_CONTINUATION_MARKER;
263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[offset].order       = order;
266c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[offset].lowOffset   = low;
267c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[offset].highOffset = high;
268c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
269c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return &ceb[offset];
270c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
271c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
272c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t Target::stringLength()
273c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
274c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (targetString != NULL) {
275c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return targetLength;
276c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
277c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
278c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return 0;
279c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
280c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
281c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottUChar Target::charAt(int32_t offset)
282c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
283c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (targetString != NULL) {
284c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return targetBuffer[offset];
285c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
286c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
287c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return 0x0000;
288c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
289c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
290c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid Target::setOffset(int32_t offset)
291c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
292c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UErrorCode status = U_ZERO_ERROR;
293c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
294c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bufferMin = 0;
295c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bufferMax = 0;
296c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
297c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ucol_setOffset(elements, offset, &status);
298c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
299c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
300c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid Target::setLast(int32_t last)
301c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
302c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UErrorCode status = U_ZERO_ERROR;
303c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
304c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bufferMin = 0;
305c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bufferMax = 1;
306c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
307c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[0].order      = UCOL_NULLORDER;
308c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[0].lowOffset  = last;
309c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ceb[0].highOffset = last;
310c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
311c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ucol_setOffset(elements, last, &status);
312c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
313c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
314c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t Target::getOffset()
315c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
316c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return ucol_getOffset(elements);
317c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
318c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
319c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottUBool Target::isBreakBoundary(int32_t offset)
320c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
321c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return ubrk_isBoundary(charBreakIterator, offset);
322c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
323c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
324c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t Target::nextBreakBoundary(int32_t offset)
325c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
326c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return ubrk_following(charBreakIterator, offset);
327c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
328c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
329c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t Target::nextSafeBoundary(int32_t offset)
330c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
331c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    while (offset < targetLength) {
332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      //UChar ch = charAt(offset);
333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        UChar ch = targetBuffer[offset];
334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
335c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) {
336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            return offset;
337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
339c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        offset += 1;
340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return targetLength;
343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
344c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottUBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end)
346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (strength < UCOL_IDENTICAL) {
348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return TRUE;
349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
351c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UChar t2[32], p2[32];
352c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const UChar *pBuffer = pattern.getBuffer();
353c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t pLength = pattern.length();
354c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t length = end - start;
355c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
356c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
357c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
358c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t decomplength = unorm_decompose(t2, ARRAY_SIZE(t2),
359c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                       targetBuffer + start, length,
360c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                       FALSE, 0, &status);
361c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // use separate status2 in case of buffer overflow
363c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (decomplength != unorm_decompose(p2, ARRAY_SIZE(p2),
364c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                        pBuffer, pLength,
365c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                        FALSE, 0, &status2)) {
366c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return FALSE; // lengths are different
367c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
368c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
369c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // compare contents
370c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UChar *text, *pat;
371c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
372c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if(U_SUCCESS(status)) {
373c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        text = t2;
374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        pat = p2;
375c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else if(status == U_BUFFER_OVERFLOW_ERROR) {
376c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        status = U_ZERO_ERROR;
377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
378c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // allocate one buffer for both decompositions
379c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        text = NEW_ARRAY(UChar, decomplength * 2);
380c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
381c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Check for allocation failure.
382c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (text == NULL) {
383c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        	return FALSE;
384c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
385c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
386c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        pat = text + decomplength;
387c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
388c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        unorm_decompose(text, decomplength, targetBuffer + start,
389c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        length, FALSE, 0, &status);
390c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
391c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        unorm_decompose(pat, decomplength, pBuffer,
392c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        pLength, FALSE, 0, &status);
393c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else {
394c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // NFD failed, make sure that u_memcmp() does not overrun t2 & p2
395c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // and that we don't uprv_free() an undefined text pointer
396c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        text = pat = t2;
397c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        decomplength = 0;
398c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
399c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
400c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UBool result = (UBool)(u_memcmp(pat, text, decomplength) == 0);
401c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
402c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if(text != t2) {
403c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        DELETE_ARRAY(text);
404c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
405c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
406c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // return FALSE if NFD failed
407c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return U_SUCCESS(status) && result;
408c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
409c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
410c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define HASH_TABLE_SIZE 257
411c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
412c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass BadCharacterTable : public UMemory
413c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
414c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottpublic:
415c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status);
416c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ~BadCharacterTable();
417c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
418c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t operator[](uint32_t ce) const;
419c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t getMaxSkip() const;
420c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t minLengthInChars(int32_t index);
421c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
422c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottprivate:
423c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    static int32_t hash(uint32_t ce);
424c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
425c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t maxSkip;
426c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t badCharacterTable[HASH_TABLE_SIZE];
427c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
428c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t *minLengthCache;
429c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
430c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
431c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottBadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status)
432c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    : minLengthCache(NULL)
433c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
434c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t plen = patternCEs.size();
435c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
436c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // **** need a better way to deal with this ****
437c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (U_FAILURE(status) || plen == 0) {
438c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
439c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
440c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
441c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t *history = NEW_ARRAY(int32_t, plen);
442c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
443c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (history == NULL) {
444c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        status = U_MEMORY_ALLOCATION_ERROR;
445c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
446c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
447c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
448c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for (int32_t i = 0; i < plen; i += 1) {
449c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        history[i] = -1;
450c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
451c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
452c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    minLengthCache = NEW_ARRAY(int32_t, plen + 1);
453c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
454c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (minLengthCache == NULL) {
455c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        DELETE_ARRAY(history);
456c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        status = U_MEMORY_ALLOCATION_ERROR;
457c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
458c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
459c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
460c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history);
461c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
462c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) {
463c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        badCharacterTable[j] = maxSkip;
464c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
465c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
466c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for(int32_t p = 1; p < plen; p += 1) {
467c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history);
468c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
469c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Make sure this entry is not bigger than the previous one.
470c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Otherwise, we might skip too far in some cases.
471c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) {
472c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            minLengthCache[p] = minLengthCache[p - 1];
473c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
474c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
475c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
476c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    minLengthCache[plen] = 0;
477c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
478c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for(int32_t p = 0; p < plen - 1; p += 1) {
479c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1];
480c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
481c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
482c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DELETE_ARRAY(history);
483c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
484c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
485c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottBadCharacterTable::~BadCharacterTable()
486c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
487c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DELETE_ARRAY(minLengthCache);
488c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
489c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
490c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t BadCharacterTable::operator[](uint32_t ce) const
491c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
492c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return badCharacterTable[hash(ce)];
493c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
494c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
495c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t BadCharacterTable::getMaxSkip() const
496c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
497c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return maxSkip;
498c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
499c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
500c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t BadCharacterTable::minLengthInChars(int32_t index)
501c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
502c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return minLengthCache[index];
503c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
504c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
505c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t BadCharacterTable::hash(uint32_t ce)
506c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
507c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE;
508c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
509c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
510c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass GoodSuffixTable : public UMemory
511c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
512c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottpublic:
513c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status);
514c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ~GoodSuffixTable();
515c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
516c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t operator[](int32_t offset) const;
517c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
518c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottprivate:
519c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t *goodSuffixTable;
520c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
521c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
522c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottGoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status)
523c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    : goodSuffixTable(NULL)
524c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
525c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t patlen = patternCEs.size();
526c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
527c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // **** need a better way to deal with this ****
528c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (U_FAILURE(status) || patlen <= 0) {
529c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
530c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
531c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
532c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t *suff  = NEW_ARRAY(int32_t, patlen);
533c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t start = patlen - 1, end = - 1;
534c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t maxSkip = badCharacterTable.getMaxSkip();
535c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
536c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (suff == NULL) {
537c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        status = U_MEMORY_ALLOCATION_ERROR;
538c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
539c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
540c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
541c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // initialze suff
542c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    suff[patlen - 1] = patlen;
543c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
544c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for (int32_t i = patlen - 2; i >= 0; i -= 1) {
545c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // (i > start) means we're inside the last suffix match we found
546c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // ((patlen - 1) - end) is how far the end of that match is from end of pattern
547c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // (i - start) is how far we are from start of that match
548c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // (i + (patlen - 1) - end) is index of same character at end of pattern
549c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // so if any suffix match at that character doesn't extend beyond the last match,
550c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // it's the suffix for this character as well
551c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (i > start && suff[i + patlen - 1 - end] < i - start) {
552c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            suff[i] = suff[i + patlen - 1 - end];
553c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        } else {
554c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            start = end = i;
555c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
556c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            int32_t s = patlen;
557c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
558c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            while (start >= 0 && patternCEs[start] == patternCEs[--s]) {
559c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                start -= 1;
560c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
561c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
562c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            suff[i] = end - start;
563c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
564c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
565c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
566c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // now build goodSuffixTable
567c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    goodSuffixTable  = NEW_ARRAY(int32_t, patlen);
568c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
569c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (goodSuffixTable == NULL) {
570c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        DELETE_ARRAY(suff);
571c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        status = U_MEMORY_ALLOCATION_ERROR;
572c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
573c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
574c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
575c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
576c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // initialize entries to minLengthInChars of the pattern
577c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for (int32_t i = 0; i < patlen; i += 1) {
578c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        goodSuffixTable[i] = maxSkip;
579c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
580c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
581c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t prefix = 0;
582c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
583c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) {
584c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (suff[i] == i + 1) {
585c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // this matching suffix is a prefix of the pattern
586c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1);
587c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
588c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // for any mis-match before this suffix, we should skip
589c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // so that the front of the pattern (i.e. the prefix)
590c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // lines up with the front of the suffix.
591c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // (patlen - 1 - i) is the start of the suffix
592c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            while (prefix < patlen - 1 - i) {
593c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                // value of maxSkip means never set...
594c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                if (goodSuffixTable[prefix] == maxSkip) {
595c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    goodSuffixTable[prefix] = prefixSkip;
596c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                }
597c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
598c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                prefix += 1;
599c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
600c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
601c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
602c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
603c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    for (int32_t i = 0; i < patlen - 1; i += 1) {
604c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1);
605c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
606c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
607c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DELETE_ARRAY(suff);
608c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
609c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
610c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottGoodSuffixTable::~GoodSuffixTable()
611c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
612c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DELETE_ARRAY(goodSuffixTable);
613c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
614c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
615c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottint32_t GoodSuffixTable::operator[](int32_t offset) const
616c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
617c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return goodSuffixTable[offset];
618c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
619c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
620c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottUOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch)
621c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
622c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
623c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottUBool BoyerMooreSearch::empty()
624c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
625c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return patCEs->size() <= 0;
626c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
627c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
628c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottCollData *BoyerMooreSearch::getData()
629c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
630c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return data;
631c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
632c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
633c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottCEList *BoyerMooreSearch::getPatternCEs()
634c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
635c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return patCEs;
636c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
637c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
638c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottBadCharacterTable *BoyerMooreSearch::getBadCharacterTable()
639c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
640c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return badCharacterTable;
641c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
642c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
643c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottGoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable()
644c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
645c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return goodSuffixTable;
646c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
647c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
648c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottBoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString,
649c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                   UErrorCode &status)
650c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL)
651c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
652c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
653c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (U_FAILURE(status)) {
654c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
655c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
656c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
657c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UCollator *collator = data->getCollator();
658c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
659c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    patCEs = new CEList(collator, patternString, status);
660c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
661c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (patCEs == NULL || U_FAILURE(status)) {
662c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
663c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
664c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
665c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    badCharacterTable = new BadCharacterTable(*patCEs, data, status);
666c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
667c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (badCharacterTable == NULL || U_FAILURE(status)) {
668c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
669c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
670c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
671c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status);
672c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
673c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (targetString != NULL) {
674c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        target = new Target(collator, targetString, patCEs->size(), status);
675c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
676c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
677c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
678c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottBoyerMooreSearch::~BoyerMooreSearch()
679c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
680c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    delete target;
681c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    delete goodSuffixTable;
682c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    delete badCharacterTable;
683c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    delete patCEs;
684c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
685c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
686c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status)
687c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
688c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (U_FAILURE(status)) {
689c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return;
690c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
691c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
692c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (target == NULL) {
693c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        target = new Target(data->getCollator(), targetString, patCEs->size(), status);
694c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else {
695c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        target->setTargetString(targetString);
696c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
697c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
698c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
699c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. ****
700c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott/*
701c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott * TODO:
702c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *  * deal with trailing (and leading?) ignorables.
703c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott *  * Adding BoyerMooreSearch object slowed it down. How can we speed it up?
704c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott */
705c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottUBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end)
706c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott{
707c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    /*UCollator *coll =*/ data->getCollator();
708c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t plen = patCEs->size();
709c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t tlen = target->stringLength();
710c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t maxSkip = badCharacterTable->getMaxSkip();
711c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    int32_t tOffset = offset + maxSkip;
712c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
713c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (plen <= 0) {
714c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Searching for a zero length pattern always fails.
715c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        start = end = -1;
716c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return FALSE;
717c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
718c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
719c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    while (tOffset <= tlen) {
720c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        int32_t pIndex = plen - 1;
721c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        int32_t tIndex = 0;
722c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        int32_t lIndex = 0;
723c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
724c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (tOffset < tlen) {
725c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // **** we really want to skip ahead enough to  ****
726c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // **** be sure we get at least 1 non-ignorable ****
727c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // **** CE after the end of the pattern.        ****
728c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            int32_t next = target->nextSafeBoundary(tOffset + 1);
729c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
730c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            target->setOffset(next);
731c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
732c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            for (lIndex = 0; ; lIndex += 1) {
733c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                const CEI *cei = target->prevCE(lIndex);
734c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                int32_t low = cei->lowOffset;
735c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                int32_t high = cei->highOffset;
736c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
737c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                if (high == 0 || (low < high && low <= tOffset)) {
738c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    if (low < tOffset) {
739c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) {
740c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            lIndex -= 1;
741c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        }
742c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
743c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        if (high > tOffset) {
744c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                            tOffset = high;
745c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                        }
746c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    }
747c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
748c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    break;
749c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                }
750c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
751c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        } else {
752c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            target->setLast(tOffset);
753c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            lIndex = 0;
754c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
755c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
756c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        tIndex = ++lIndex;
757c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
758c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Iterate backward until we hit the beginning of the pattern
759c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        while (pIndex >= 0) {
760c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            uint32_t pce = (*patCEs)[pIndex];
761c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            const CEI *tcei = target->prevCE(tIndex++);
762c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
763c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
764c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (tcei->order != pce) {
765c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                // There is a mismatch at this position.  Decide how far
766c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                // over to shift the pattern, then try again.
767c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
768c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex];
769c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifdef EXTRA_CAUTIOUS
770c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                int32_t old = tOffset;
771c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif
772c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
773c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1);
774c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
775c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                if (gsOffset > tOffset) {
776c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    tOffset = gsOffset;
777c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                }
778c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
779c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifdef EXTRA_CAUTIOUS
780c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                // Make sure we don't skip backwards...
781c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                if (tOffset <= old) {
782c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    tOffset = old + 1;
783c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                }
784c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif
785c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
786c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                break;
787c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
788c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
789c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            pIndex -= 1;
790c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
791c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
792c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (pIndex < 0) {
793c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // We made it back to the beginning of the pattern,
794c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // which means we matched it all.  Return the location.
795c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            const CEI firstCEI = *target->prevCE(tIndex - 1);
796c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            const CEI lastCEI  = *target->prevCE(lIndex);
797c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            int32_t mStart   = firstCEI.lowOffset;
798c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            int32_t minLimit = lastCEI.lowOffset;
799c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            int32_t maxLimit = lastCEI.highOffset;
800c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            int32_t mLimit;
801c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            UBool found = TRUE;
802c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
803c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            target->setOffset(/*tOffset*/maxLimit);
804c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
805c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            const CEI nextCEI = *target->nextCE(0);
806c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
807c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (nextCEI.lowOffset > maxLimit) {
808c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                maxLimit = nextCEI.lowOffset;
809c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
810c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
811c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) {
812c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                found = FALSE;
813c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
814c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
815c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (! target->isBreakBoundary(mStart)) {
816c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                found = FALSE;
817c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
818c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
819c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (firstCEI.lowOffset == firstCEI.highOffset) {
820c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                found = FALSE;
821c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
822c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
823c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            mLimit = maxLimit;
824c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (minLimit < maxLimit) {
825c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                int32_t nbb = target->nextBreakBoundary(minLimit);
826c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
827c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                if (nbb >= lastCEI.highOffset) {
828c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                    mLimit = nbb;
829c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                }
830c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
831c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
832c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (mLimit > maxLimit) {
833c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                found = FALSE;
834c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
835c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
836c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (! target->isBreakBoundary(mLimit)) {
837c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                found = FALSE;
838c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
839c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
840c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (! target->isIdentical(pattern, mStart, mLimit)) {
841c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                found = FALSE;
842c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
843c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
844c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            if (found) {
845c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                start = mStart;
846c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                end   = mLimit;
847c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
848c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                return TRUE;
849c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            }
850c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
851c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip?
852c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
853c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Otherwise, we're here because of a mismatch, so keep going....
854c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
855c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
856c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // no match
857c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   start = -1;
858c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   end = -1;
859c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   return FALSE;
860c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
861c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
862c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick ScottU_NAMESPACE_END
863c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
864c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif // #if !UCONFIG_NO_COLLATION
865