1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 2000-2015, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10
11/**
12 * Port From:   ICU4C v2.1 : collate/StringSearchTest
13 * Source File: $ICU4CRoot/source/test/intltest/srchtest.cpp
14 **/
15
16package android.icu.dev.test.search;
17
18import static android.icu.text.Collator.IDENTICAL;
19import static android.icu.text.Collator.PRIMARY;
20import static android.icu.text.Collator.QUATERNARY;
21import static android.icu.text.Collator.SECONDARY;
22import static android.icu.text.Collator.TERTIARY;
23import static android.icu.text.SearchIterator.ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD;
24import static android.icu.text.SearchIterator.ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD;
25import static android.icu.text.SearchIterator.ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
26
27import java.text.CharacterIterator;
28import java.text.StringCharacterIterator;
29import java.util.Locale;
30
31import org.junit.Before;
32import org.junit.Test;
33import org.junit.runner.RunWith;
34import org.junit.runners.JUnit4;
35
36import android.icu.dev.test.TestFmwk;
37import android.icu.text.BreakIterator;
38import android.icu.text.Collator;
39import android.icu.text.RuleBasedCollator;
40import android.icu.text.SearchIterator;
41import android.icu.text.SearchIterator.ElementComparisonType;
42import android.icu.text.StringSearch;
43import android.icu.util.ULocale;
44import android.icu.testsharding.MainTestShard;
45
46@MainTestShard
47@RunWith(JUnit4.class)
48public class SearchTest extends TestFmwk {
49
50    //inner class
51    static class SearchData {
52        SearchData(String text, String pattern,
53                    String coll, int strength, ElementComparisonType cmpType, String breaker,
54                    int[] offset, int[] size) {
55            this.text = text;
56            this.pattern = pattern;
57            this.collator = coll;
58            this.strength = strength;
59            this.cmpType = cmpType;
60            this.breaker = breaker;
61            this.offset = offset;
62            this.size = size;
63        }
64        String              text;
65        String              pattern;
66        String              collator;
67        int                 strength;
68        ElementComparisonType   cmpType;
69        String              breaker;
70        int[]               offset;
71        int[]               size;
72    }
73
74    RuleBasedCollator m_en_us_;
75    RuleBasedCollator m_fr_fr_;
76    RuleBasedCollator m_de_;
77    RuleBasedCollator m_es_;
78    BreakIterator     m_en_wordbreaker_;
79    BreakIterator     m_en_characterbreaker_;
80
81    // Just calling SearchData constructor, to make the test data source code
82    // nice and short
83    private static SearchData SD(String text, String pattern, String coll, int strength,
84                    ElementComparisonType cmpType, String breaker, int[] offset, int[] size) {
85        return new SearchData(text, pattern, coll, strength, cmpType, breaker, offset, size);
86    }
87
88    // Just returning int[], to make the test data nice and short
89    private static int[] IA(int... elements) {
90        return elements;
91    }
92
93    static SearchData[] BASIC = {
94        SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
95        SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)),
96        SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)),
97        SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)),
98        SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)),
99        SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
100        SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)),
101        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
102        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
103        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
104        SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
105        SD("\u00c9", "e", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
106    };
107
108    SearchData BREAKITERATOREXACT[] = {
109        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)),
110        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)),
111        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)),
112        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)),
113        SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)),
114        /* jitterbug 1745 */
115        SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)),
116        SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)),
117        SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)),
118    };
119
120    SearchData BREAKITERATORCANONICAL[] = {
121        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)),
122        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)),
123        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)),
124        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)),
125        SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)),
126        /* jitterbug 1745 */
127        SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)),
128        SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)),
129        SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)),
130    };
131
132    SearchData BASICCANONICAL[] = {
133        SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
134        SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)),
135        SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)),
136        SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)),
137        SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)),
138        SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
139        SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)),
140
141        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
142        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
143        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
144        SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
145        SD("a\u0300\u0325b", "\u0300b", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
146        SD("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
147        SD("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
148        SD("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
149
150        SD("\u00c4\u0323", "A\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
151        SD("\u0308\u0323", "\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
152    };
153
154    SearchData COLLATOR[] = {
155        /* english */
156        SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
157        /* tailored */
158        SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)),
159    };
160
161    String TESTCOLLATORRULE = "& o,O ; p,P";
162    String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc";
163
164    SearchData COLLATORCANONICAL[] = {
165        /* english */
166        SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
167        /* tailored */
168        SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)),
169    };
170
171    SearchData COMPOSITEBOUNDARIES[] = {
172        SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
173        SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
174        SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
175        SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
176        SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
177        SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
178
179        /* first one matches only because it's at the start of the text */
180        SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
181
182        /* \\u0300 blocked by \\u0300 */
183        SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
184
185        /* A + 030A + 0301 */
186        SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
187        SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
188
189        SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
190        SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
191
192        SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
193
194        SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
195
196        /* blocked accent */
197        SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
198        SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
199
200        SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
201        SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
202        SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
203
204        SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
205
206        SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
207        SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
208
209        SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
210
211        SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
212        SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
213        SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)),
214    };
215
216    SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
217        SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
218        SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
219        SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
220        SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
221        SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
222        SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
223
224        /* first one matches only because it's at the start of the text */
225        SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
226
227        /* \u0300 blocked by \u0300 */
228        SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
229
230        /* A + 030A + 0301 */
231        SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
232        SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
233
234        SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
235        SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
236
237        SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
238
239        SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
240
241        /* blocked accent */
242        SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
243        SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
244
245        SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
246        SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
247        SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
248
249        SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
250
251        SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
252        SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
253
254        SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
255
256        SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
257        SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
258
259        SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)),
260    };
261
262    SearchData SUPPLEMENTARY[] = {
263        SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
264                "\uD800\uDC00", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)),
265        SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)),
266        SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
267        SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
268        SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
269        SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
270    };
271
272    String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315";
273
274    SearchData CONTRACTION[] = {
275        /* common discontiguous */
276        SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
277
278        SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
279
280        /* contraction prefix */
281        SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
282
283        SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
284        SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
285
286        /*
287         * discontiguous problem here for backwards iteration. accents not found because discontiguous stores all
288         * information
289         */
290        SD("X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
291        /* ends not with a contraction character */
292        SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
293        SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
294        SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
295        /* blocked discontiguous */
296        SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
297
298        /*
299         * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the
300         * match fails because it ends in the middle of an expansion...
301         */
302        SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
303    };
304
305    SearchData CONTRACTIONCANONICAL[] = {
306        /* common discontiguous */
307        SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
308        SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
309
310        /* contraction prefix */
311        SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
312
313        SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
314        SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
315
316        /*
317         * discontiguous problem here for backwards iteration. forwards gives 0, 4 but backwards give 1, 3
318         */
319        /*
320         * {"X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, {0, -1), {4}),
321         */
322
323        /* ends not with a contraction character */
324        SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
325        SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
326
327        SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
328
329        /* blocked discontiguous */
330        SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
331
332        /*
333         * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the
334         * match fails because it ends in the middle of an expansion...
335         */
336        SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(2)),
337    };
338
339    SearchData MATCH[] = {
340        SD("a busy bee is a very busy beeee", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, -1), IA(3, 3)),
341        /*  012345678901234567890123456789012345678901234567890 */
342        SD("a busy bee is a very busy beeee with no bee life", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, 40, -1), IA(3, 3, 3)),
343    };
344
345    String IGNORABLERULE = "&a = \u0300";
346
347    SearchData IGNORABLE[] = {
348        /*
349         * This isn't much of a test when matches have to be on grapheme boundiaries. The match at 0 only works because it's
350         * at the start of the text.
351         */
352        SD("\u0300\u0315 \u0300\u0315 ", "\u0300", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
353    };
354
355    SearchData DIACTRICMATCH[] = {
356        SD("\u0061\u0061\u00E1", "\u0061\u00E1", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(2)),
357        SD("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020", "\u00C2\u0303",
358            null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 4, 5, 6, 7, 10, 12, 13, 16, -1), IA(2, 1, 1, 1, 3, 2, 1, 3, 2)),
359        SD("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 5, -1), IA(4, 3)),
360    };
361
362    SearchData NORMCANONICAL[] = {
363        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
364        SD("\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
365        SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
366        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
367        SD("a\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
368        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
369    };
370
371    SearchData NORMEXACT[] = {
372        SD("a\u0300\u0325", "a\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
373    };
374
375    SearchData NONNORMEXACT[] = {
376        SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
377    };
378
379    SearchData OVERLAP[] = {
380        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)),
381    };
382
383    SearchData NONOVERLAP[] = {
384        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)),
385    };
386
387    SearchData OVERLAPCANONICAL[] = {
388        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)),
389    };
390
391    SearchData NONOVERLAPCANONICAL[] = {
392        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)),
393    };
394
395    SearchData PATTERNCANONICAL[] = {
396        SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)),
397        SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
398    };
399
400    SearchData PATTERN[] = {
401        SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)),
402        SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
403    };
404
405    String PECHE_WITH_ACCENTS = "un p\u00E9ch\u00E9, "
406                                + "\u00E7a p\u00E8che par, "
407                                + "p\u00E9cher, "
408                                + "une p\u00EAche, "
409                                + "un p\u00EAcher, "
410                                + "j\u2019ai p\u00EAch\u00E9, "
411                                + "un p\u00E9cheur, "
412                                + "\u201Cp\u00E9che\u201D, "
413                                + "decomp peche\u0301, "
414                                + "base peche";
415    // in the above, the interesting words and their offsets are:
416    //    3 pe<301>che<301>
417    //    13 pe<300>che
418    //    24 pe<301>cher
419    //    36 pe<302>che
420    //    46 pe<302>cher
421    //    59 pe<302>che<301>
422    //    69 pe<301>cheur
423    //    79 pe<301>che
424    //    94 peche<+301>
425    //    107 peche
426
427    SearchData STRENGTH[] = {
428        /*  012345678901234567890123456789012345678901234567890123456789 */
429        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
430        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)),
431        SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
432                "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)),
433        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)),
434        SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)),
435        SD("\u00c0 should match but not A", "A\u0300", "en", IDENTICAL, STANDARD_ELEMENT_COMPARISON,  null, IA(0, -1), IA(1, 0)),
436
437        /* some tests for modified element comparison, ticket #7093 */
438        SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
439        SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
440        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)),
441        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
442        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
443        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)),
444        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)),
445        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)),
446        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)),
447        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)),
448        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)),
449        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
450        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
451        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
452        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
453        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
454        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
455        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
456        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
457
458        /* more tests for modified element comparison (with fr), ticket #7093 */
459        SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
460        SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
461        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)),
462        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
463        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
464        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)),
465        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)),
466        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)),
467        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)),
468        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)),
469        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)),
470        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
471        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
472        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
473        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
474        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
475        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
476        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
477        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
478
479    };
480
481    SearchData STRENGTHCANONICAL[] = {
482        /*  012345678901234567890123456789012345678901234567890123456789 */
483        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
484        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)),
485        SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
486                "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)),
487        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)),
488        SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)),
489    };
490
491    SearchData SUPPLEMENTARYCANONICAL[] = {
492        /*  012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
493        SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", "\uD800\uDC00",
494            null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)),
495        SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)),
496        SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
497        SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
498        SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
499        SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
500    };
501
502    static SearchData VARIABLE[] = {
503        /*  012345678901234567890123456789012345678901234567890123456789 */
504        SD("blackbirds black blackbirds blackbird black-bird", "blackbird", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 17, 28, 38, -1), IA(9, 9, 9, 10)),
505
506        /*
507         * to see that it doesn't go into an infinite loop if the start of text is a ignorable character
508         */
509        SD(" on", "go", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
510        SD("abcdefghijklmnopqrstuvwxyz", "   ",
511            null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null,
512            IA(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1),
513            IA(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
514
515        /* testing tightest match */
516        SD(" abc  a bc   ab c    a  bc     ab  c", "abc", null, QUATERNARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(3)),
517        /*  012345678901234567890123456789012345678901234567890123456789 */
518        SD(" abc  a bc   ab c    a  bc     ab  c", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 6, 13, 21, 31, -1), IA(3, 4, 4, 5, 5)),
519
520        /* totally ignorable text */
521        SD("           ---------------", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
522    };
523
524    static SearchData TEXTCANONICAL[] = {
525        SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)),
526        SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)),
527    };
528
529    static SearchData INDICPREFIXMATCH[] = {
530        SD("\u0915\u0020\u0915\u0901\u0020\u0915\u0902\u0020\u0915\u0903\u0020\u0915\u0940\u0020\u0915\u093F\u0020\u0915\u0943\u0020\u0915\u093C\u0020\u0958",
531                "\u0915", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 5, 8, 11, 14, 17, 20, 23,-1), IA(1, 2, 2, 2, 1, 1, 1, 2, 1)),
532        SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947",
533                "\u0915\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 3, 7, 11, -1), IA(2, 2, 2, 2)),
534        SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947",
535                "\u0915\u0943\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 19, -1), IA(3, 3)),
536    };
537
538    /**
539     * Constructor
540     */
541    public SearchTest()
542    {
543
544    }
545
546    @Before
547    public void init() throws Exception {
548        m_en_us_ = (RuleBasedCollator)Collator.getInstance(Locale.US);
549        m_fr_fr_ = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE);
550        m_de_ = (RuleBasedCollator)Collator.getInstance(new Locale("de", "DE"));
551        m_es_ = (RuleBasedCollator)Collator.getInstance(new Locale("es", "ES"));
552        m_en_wordbreaker_ = BreakIterator.getWordInstance();
553        m_en_characterbreaker_ = BreakIterator.getCharacterInstance();
554        String rules = m_de_.getRules() + EXTRACOLLATIONRULE;
555        m_de_ = new RuleBasedCollator(rules);
556        rules = m_es_.getRules() + EXTRACOLLATIONRULE;
557        m_es_ = new RuleBasedCollator(rules);
558
559    }
560
561    RuleBasedCollator getCollator(String collator) {
562        if (collator == null) {
563            return m_en_us_;
564        } if (collator.equals("fr")) {
565            return m_fr_fr_;
566        } else if (collator.equals("de")) {
567            return m_de_;
568        } else if (collator.equals("es")) {
569            return m_es_;
570        } else {
571            return m_en_us_;
572        }
573    }
574
575    BreakIterator getBreakIterator(String breaker) {
576        if (breaker == null) {
577            return null;
578        } if (breaker.equals("wordbreaker")) {
579            return m_en_wordbreaker_;
580        } else {
581            return m_en_characterbreaker_;
582        }
583    }
584
585    boolean assertCanonicalEqual(SearchData search) {
586        Collator      collator = getCollator(search.collator);
587        BreakIterator breaker  = getBreakIterator(search.breaker);
588        StringSearch  strsrch;
589
590        String text = search.text;
591        String  pattern = search.pattern;
592
593        if (breaker != null) {
594            breaker.setText(text);
595        }
596        collator.setStrength(search.strength);
597        collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
598        try {
599            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
600            strsrch.setElementComparisonType(search.cmpType);
601            strsrch.setCanonical(true);
602        } catch (Exception e) {
603            errln("Error opening string search" + e.getMessage());
604            return false;
605        }
606
607        if (!assertEqualWithStringSearch(strsrch, search)) {
608            collator.setStrength(TERTIARY);
609            collator.setDecomposition(Collator.NO_DECOMPOSITION);
610            return false;
611        }
612        collator.setStrength(TERTIARY);
613        collator.setDecomposition(Collator.NO_DECOMPOSITION);
614        return true;
615    }
616
617    boolean assertEqual(SearchData search) {
618        Collator      collator = getCollator(search.collator);
619        BreakIterator breaker  = getBreakIterator(search.breaker);
620        StringSearch  strsrch;
621
622        String text = search.text;
623        String  pattern = search.pattern;
624
625        if (breaker != null) {
626            breaker.setText(text);
627        }
628        collator.setStrength(search.strength);
629        try {
630            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
631            strsrch.setElementComparisonType(search.cmpType);
632        } catch (Exception e) {
633            errln("Error opening string search " + e.getMessage());
634            return false;
635        }
636
637        if (!assertEqualWithStringSearch(strsrch, search)) {
638            collator.setStrength(TERTIARY);
639            return false;
640        }
641        collator.setStrength(TERTIARY);
642        return true;
643    }
644
645    boolean assertEqualWithAttribute(SearchData search, boolean canonical, boolean overlap) {
646        Collator      collator = getCollator(search.collator);
647        BreakIterator breaker  = getBreakIterator(search.breaker);
648        StringSearch  strsrch;
649
650        String text = search.text;
651        String  pattern = search.pattern;
652
653        if (breaker != null) {
654            breaker.setText(text);
655        }
656        collator.setStrength(search.strength);
657        try {
658            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
659            strsrch.setCanonical(canonical);
660            strsrch.setOverlapping(overlap);
661            strsrch.setElementComparisonType(search.cmpType);
662        } catch (Exception e) {
663            errln("Error opening string search " + e.getMessage());
664            return false;
665        }
666
667        if (!assertEqualWithStringSearch(strsrch, search)) {
668            collator.setStrength(TERTIARY);
669            return false;
670        }
671        collator.setStrength(TERTIARY);
672        return true;
673    }
674
675    boolean assertEqualWithStringSearch(StringSearch strsrch, SearchData search) {
676        int           count       = 0;
677        int   matchindex  = search.offset[count];
678        String matchtext;
679
680        if (strsrch.getMatchStart() != SearchIterator.DONE ||
681            strsrch.getMatchLength() != 0) {
682            errln("Error with the initialization of match start and length");
683        }
684        // start of following matches
685        while (matchindex >= 0) {
686            int matchlength = search.size[count];
687            strsrch.next();
688            //int x = strsrch.getMatchStart();
689            if (matchindex != strsrch.getMatchStart() ||
690                matchlength != strsrch.getMatchLength()) {
691                errln("Text: " + search.text);
692                errln("Searching forward for pattern: " + strsrch.getPattern());
693                errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
694                return false;
695            }
696            count ++;
697
698            matchtext = strsrch.getMatchedText();
699            String targetText = search.text;
700            if (matchlength > 0 &&
701                targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) {
702                errln("Error getting following matched text");
703            }
704
705            matchindex = search.offset[count];
706        }
707        strsrch.next();
708        if (strsrch.getMatchStart() != SearchIterator.DONE ||
709            strsrch.getMatchLength() != 0) {
710                errln("Text: " + search.text);
711                errln("Searching forward for pattern: " + strsrch.getPattern());
712                errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
713                return false;
714        }
715        // start of preceding matches
716        count = count == 0 ? 0 : count - 1;
717        matchindex = search.offset[count];
718        while (matchindex >= 0) {
719            int matchlength = search.size[count];
720            strsrch.previous();
721            if (matchindex != strsrch.getMatchStart() ||
722                matchlength != strsrch.getMatchLength()) {
723                errln("Text: " + search.text);
724                errln("Searching backward for pattern: " + strsrch.getPattern());
725                errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
726                return false;
727            }
728
729            matchtext = strsrch.getMatchedText();
730            String targetText = search.text;
731            if (matchlength > 0 &&
732                targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) {
733                errln("Error getting following matched text");
734            }
735
736            matchindex = count > 0 ? search.offset[count - 1] : -1;
737            count --;
738        }
739        strsrch.previous();
740        if (strsrch.getMatchStart() != SearchIterator.DONE ||
741            strsrch.getMatchLength() != 0) {
742                errln("Text: " + search.text);
743                errln("Searching backward for pattern: " + strsrch.getPattern());
744                errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
745                return false;
746        }
747        return true;
748    }
749
750    @Test
751    public void TestConstructor()
752    {
753        String pattern = "pattern";
754        String text = "text";
755        StringCharacterIterator textiter = new StringCharacterIterator(text);
756        Collator defaultcollator = Collator.getInstance();
757        BreakIterator breaker = BreakIterator.getCharacterInstance();
758        breaker.setText(text);
759        StringSearch search = new StringSearch(pattern, text);
760        if (!search.getPattern().equals(pattern)
761            || !search.getTarget().equals(textiter)
762            || !search.getCollator().equals(defaultcollator)
763            /*|| !search.getBreakIterator().equals(breaker)*/) {
764            errln("StringSearch(String, String) error");
765        }
766        search = new StringSearch(pattern, textiter, m_fr_fr_);
767        if (!search.getPattern().equals(pattern)
768            || !search.getTarget().equals(textiter)
769            || !search.getCollator().equals(m_fr_fr_)
770            /*|| !search.getBreakIterator().equals(breaker)*/) {
771            errln("StringSearch(String, StringCharacterIterator, "
772                  + "RuleBasedCollator) error");
773        }
774        Locale de = new Locale("de", "DE");
775        breaker = BreakIterator.getCharacterInstance(de);
776        breaker.setText(text);
777        search = new StringSearch(pattern, textiter, de);
778        if (!search.getPattern().equals(pattern)
779            || !search.getTarget().equals(textiter)
780            || !search.getCollator().equals(Collator.getInstance(de))
781            /*|| !search.getBreakIterator().equals(breaker)*/) {
782            errln("StringSearch(String, StringCharacterIterator, Locale) "
783                  + "error");
784        }
785
786        search = new StringSearch(pattern, textiter, m_fr_fr_,
787                                  m_en_wordbreaker_);
788        if (!search.getPattern().equals(pattern)
789            || !search.getTarget().equals(textiter)
790            || !search.getCollator().equals(m_fr_fr_)
791            || !search.getBreakIterator().equals(m_en_wordbreaker_)) {
792            errln("StringSearch(String, StringCharacterIterator, Locale) "
793                  + "error");
794        }
795    }
796
797    @Test
798    public void TestBasic() {
799        for (int count = 0; count < BASIC.length; count++) {
800            if (!assertEqual(BASIC[count])) {
801                errln("Error at test number " + count);
802            }
803        }
804    }
805
806    @Test
807    public void TestBreakIterator() {
808
809        String text = BREAKITERATOREXACT[0].text;
810        String pattern = BREAKITERATOREXACT[0].pattern;
811        StringSearch strsrch = null;
812        try {
813            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
814        } catch (Exception e) {
815            errln("Error opening string search");
816            return;
817        }
818
819        strsrch.setBreakIterator(null);
820        if (strsrch.getBreakIterator() != null) {
821            errln("Error usearch_getBreakIterator returned wrong object");
822        }
823
824        strsrch.setBreakIterator(m_en_characterbreaker_);
825        if (!strsrch.getBreakIterator().equals(m_en_characterbreaker_)) {
826            errln("Error usearch_getBreakIterator returned wrong object");
827        }
828
829        strsrch.setBreakIterator(m_en_wordbreaker_);
830        if (!strsrch.getBreakIterator().equals(m_en_wordbreaker_)) {
831            errln("Error usearch_getBreakIterator returned wrong object");
832        }
833
834        int count = 0;
835        while (count < 4) {
836            // special purposes for tests numbers 0-3
837            SearchData        search   = BREAKITERATOREXACT[count];
838            RuleBasedCollator collator = getCollator(search.collator);
839            BreakIterator     breaker  = getBreakIterator(search.breaker);
840                  //StringSearch      strsrch;
841
842            text = search.text;
843            pattern = search.pattern;
844            if (breaker != null) {
845                breaker.setText(text);
846            }
847            collator.setStrength(search.strength);
848            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker);
849            if (strsrch.getBreakIterator() != breaker) {
850                errln("Error setting break iterator");
851            }
852            if (!assertEqualWithStringSearch(strsrch, search)) {
853                collator.setStrength(TERTIARY);
854            }
855            search   = BREAKITERATOREXACT[count + 1];
856            breaker  = getBreakIterator(search.breaker);
857            if (breaker != null) {
858                breaker.setText(text);
859            }
860            strsrch.setBreakIterator(breaker);
861            if (strsrch.getBreakIterator() != breaker) {
862                errln("Error setting break iterator");
863            }
864            strsrch.reset();
865            if (!assertEqualWithStringSearch(strsrch, search)) {
866                 errln("Error at test number " + count);
867            }
868            count += 2;
869        }
870        for (count = 0; count < BREAKITERATOREXACT.length; count++) {
871            if (!assertEqual(BREAKITERATOREXACT[count])) {
872                errln("Error at test number " + count);
873            }
874        }
875    }
876
877    @Test
878    public void TestBreakIteratorCanonical() {
879        int        count  = 0;
880        while (count < 4) {
881            // special purposes for tests numbers 0-3
882            SearchData     search   = BREAKITERATORCANONICAL[count];
883
884            String text = search.text;
885            String pattern = search.pattern;
886            RuleBasedCollator collator = getCollator(search.collator);
887            collator.setStrength(search.strength);
888
889            BreakIterator breaker = getBreakIterator(search.breaker);
890            StringSearch  strsrch = null;
891            try {
892                strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker);
893            } catch (Exception e) {
894                errln("Error creating string search data");
895                return;
896            }
897            strsrch.setCanonical(true);
898            if (!strsrch.getBreakIterator().equals(breaker)) {
899                errln("Error setting break iterator");
900                return;
901            }
902            if (!assertEqualWithStringSearch(strsrch, search)) {
903                collator.setStrength(TERTIARY);
904                return;
905            }
906            search  = BREAKITERATOREXACT[count + 1];
907            breaker = getBreakIterator(search.breaker);
908            breaker.setText(strsrch.getTarget());
909            strsrch.setBreakIterator(breaker);
910            if (!strsrch.getBreakIterator().equals(breaker)) {
911                errln("Error setting break iterator");
912                return;
913            }
914            strsrch.reset();
915            strsrch.setCanonical(true);
916            if (!assertEqualWithStringSearch(strsrch, search)) {
917                 errln("Error at test number " + count);
918                 return;
919            }
920            count += 2;
921        }
922
923        for (count = 0; count < BREAKITERATORCANONICAL.length; count++) {
924             if (!assertEqual(BREAKITERATORCANONICAL[count])) {
925                 errln("Error at test number " + count);
926                 return;
927             }
928        }
929    }
930
931    @Test
932    public void TestCanonical() {
933        for (int count = 0; count < BASICCANONICAL.length; count++) {
934            if (!assertCanonicalEqual(BASICCANONICAL[count])) {
935                errln("Error at test number " + count);
936            }
937        }
938    }
939
940    @Test
941    public void TestCollator() {
942        // test collator that thinks "o" and "p" are the same thing
943        String text = COLLATOR[0].text;
944        String pattern  = COLLATOR[0].pattern;
945        StringSearch strsrch = null;
946        try {
947            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
948        } catch (Exception e) {
949            errln("Error opening string search ");
950            return;
951        }
952        if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
953            return;
954        }
955        String rules = TESTCOLLATORRULE;
956        RuleBasedCollator tailored = null;
957        try {
958            tailored = new RuleBasedCollator(rules);
959            tailored.setStrength(COLLATOR[1].strength);
960        } catch (Exception e) {
961            errln("Error opening rule based collator ");
962            return;
963        }
964
965        strsrch.setCollator(tailored);
966        if (!strsrch.getCollator().equals(tailored)) {
967            errln("Error setting rule based collator");
968        }
969        strsrch.reset();
970        if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) {
971            return;
972        }
973        strsrch.setCollator(m_en_us_);
974        strsrch.reset();
975        if (!strsrch.getCollator().equals(m_en_us_)) {
976            errln("Error setting rule based collator");
977        }
978        if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
979           errln("Error searching collator test");
980        }
981    }
982
983    @Test
984    public void TestCollatorCanonical() {
985        /* test collator that thinks "o" and "p" are the same thing */
986        String text = COLLATORCANONICAL[0].text;
987        String pattern = COLLATORCANONICAL[0].pattern;
988
989        StringSearch strsrch = null;
990        try {
991            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
992            strsrch.setCanonical(true);
993        } catch (Exception e) {
994            errln("Error opening string search ");
995        }
996
997        if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
998            return;
999        }
1000
1001        String rules = TESTCOLLATORRULE;
1002        RuleBasedCollator tailored = null;
1003        try {
1004            tailored = new RuleBasedCollator(rules);
1005            tailored.setStrength(COLLATORCANONICAL[1].strength);
1006            tailored.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1007        } catch (Exception e) {
1008            errln("Error opening rule based collator ");
1009        }
1010
1011        strsrch.setCollator(tailored);
1012        if (!strsrch.getCollator().equals(tailored)) {
1013            errln("Error setting rule based collator");
1014        }
1015        strsrch.reset();
1016        strsrch.setCanonical(true);
1017        if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[1])) {
1018            logln("COLLATORCANONICAL[1] failed");  // Error should already be reported.
1019        }
1020        strsrch.setCollator(m_en_us_);
1021        strsrch.reset();
1022        if (!strsrch.getCollator().equals(m_en_us_)) {
1023            errln("Error setting rule based collator");
1024        }
1025        if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
1026            logln("COLLATORCANONICAL[0] failed");  // Error should already be reported.
1027        }
1028    }
1029
1030    @Test
1031    public void TestCompositeBoundaries() {
1032        for (int count = 0; count < COMPOSITEBOUNDARIES.length; count++) {
1033            // logln("composite " + count);
1034            if (!assertEqual(COMPOSITEBOUNDARIES[count])) {
1035                errln("Error at test number " + count);
1036            }
1037        }
1038    }
1039
1040    @Test
1041    public void TestCompositeBoundariesCanonical() {
1042        for (int count = 0; count < COMPOSITEBOUNDARIESCANONICAL.length; count++) {
1043            // logln("composite " + count);
1044            if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) {
1045                errln("Error at test number " + count);
1046            }
1047        }
1048    }
1049
1050    @Test
1051    public void TestContraction() {
1052        String rules = CONTRACTIONRULE;
1053        RuleBasedCollator collator = null;
1054        try {
1055            collator = new RuleBasedCollator(rules);
1056            collator.setStrength(TERTIARY);
1057            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1058        } catch (Exception e) {
1059            errln("Error opening collator ");
1060        }
1061        String text = "text";
1062        String pattern = "pattern";
1063        StringSearch strsrch = null;
1064        try {
1065            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1066        } catch (Exception e) {
1067            errln("Error opening string search ");
1068        }
1069
1070        for (int count = 0; count< CONTRACTION.length; count++) {
1071            text = CONTRACTION[count].text;
1072            pattern = CONTRACTION[count].pattern;
1073            strsrch.setTarget(new StringCharacterIterator(text));
1074            strsrch.setPattern(pattern);
1075            if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) {
1076                errln("Error at test number " + count);
1077            }
1078        }
1079    }
1080
1081    @Test
1082    public void TestContractionCanonical() {
1083        String rules = CONTRACTIONRULE;
1084        RuleBasedCollator collator = null;
1085        try {
1086            collator = new RuleBasedCollator(rules);
1087            collator.setStrength(TERTIARY);
1088            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1089        } catch (Exception e) {
1090            errln("Error opening collator ");
1091        }
1092        String text = "text";
1093        String pattern = "pattern";
1094        StringSearch strsrch = null;
1095        try {
1096            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1097            strsrch.setCanonical(true);
1098        } catch (Exception e) {
1099            errln("Error opening string search");
1100        }
1101
1102        for (int count = 0; count < CONTRACTIONCANONICAL.length; count++) {
1103            text = CONTRACTIONCANONICAL[count].text;
1104            pattern = CONTRACTIONCANONICAL[count].pattern;
1105            strsrch.setTarget(new StringCharacterIterator(text));
1106            strsrch.setPattern(pattern);
1107            if (!assertEqualWithStringSearch(strsrch, CONTRACTIONCANONICAL[count])) {
1108                errln("Error at test number " + count);
1109            }
1110        }
1111    }
1112
1113    @Test
1114    public void TestGetMatch() {
1115        SearchData search = MATCH[0];
1116        String text = search.text;
1117        String pattern = search.pattern;
1118
1119        StringSearch strsrch = null;
1120        try {
1121            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1122        } catch (Exception e) {
1123            errln("Error opening string search ");
1124            return;
1125        }
1126
1127        int           count      = 0;
1128        int   matchindex = search.offset[count];
1129        String matchtext;
1130        while (matchindex >= 0) {
1131            int matchlength = search.size[count];
1132            strsrch.next();
1133            if (matchindex != strsrch.getMatchStart() ||
1134                matchlength != strsrch.getMatchLength()) {
1135                errln("Text: " + search.text);
1136                errln("Pattern: " + strsrch.getPattern());
1137                errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1138                return;
1139            }
1140            count++;
1141
1142            matchtext = strsrch.getMatchedText();
1143            if (matchtext.length() != matchlength){
1144                errln("Error getting match text");
1145            }
1146            matchindex = search.offset[count];
1147        }
1148        strsrch.next();
1149        if (strsrch.getMatchStart()  != StringSearch.DONE ||
1150            strsrch.getMatchLength() != 0) {
1151            errln("Error end of match not found");
1152        }
1153        matchtext = strsrch.getMatchedText();
1154        if (matchtext != null) {
1155            errln("Error getting null matches");
1156        }
1157    }
1158
1159    @Test
1160    public void TestGetSetAttribute() {
1161        String  pattern = "pattern";
1162        String  text = "text";
1163        StringSearch  strsrch = null;
1164        try {
1165            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1166        } catch (Exception e) {
1167            errln("Error opening search");
1168            return;
1169        }
1170
1171        if (strsrch.isOverlapping()) {
1172            errln("Error default overlaping should be false");
1173        }
1174        strsrch.setOverlapping(true);
1175        if (!strsrch.isOverlapping()) {
1176            errln("Error setting overlap true");
1177        }
1178        strsrch.setOverlapping(false);
1179        if (strsrch.isOverlapping()) {
1180            errln("Error setting overlap false");
1181        }
1182
1183        strsrch.setCanonical(true);
1184        if (!strsrch.isCanonical()) {
1185            errln("Error setting canonical match true");
1186        }
1187        strsrch.setCanonical(false);
1188        if (strsrch.isCanonical()) {
1189            errln("Error setting canonical match false");
1190        }
1191
1192        if (strsrch.getElementComparisonType() != STANDARD_ELEMENT_COMPARISON) {
1193            errln("Error default element comparison type should be STANDARD_ELEMENT_COMPARISON");
1194        }
1195        strsrch.setElementComparisonType(ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD);
1196        if (strsrch.getElementComparisonType() != ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD) {
1197            errln("Error setting element comparison type PATTERN_BASE_WEIGHT_IS_WILDCARD");
1198        }
1199    }
1200
1201    @Test
1202    public void TestGetSetOffset() {
1203        String  pattern = "1234567890123456";
1204        String  text  = "12345678901234567890123456789012";
1205        StringSearch  strsrch = null;
1206        try {
1207            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1208        } catch (Exception e) {
1209            errln("Error opening search");
1210
1211            return;
1212        }
1213
1214        /* testing out of bounds error */
1215        try {
1216            strsrch.setIndex(-1);
1217            errln("Error expecting set offset error");
1218        } catch (IndexOutOfBoundsException e) {
1219            logln("PASS: strsrch.setIndex(-1) failed as expected");
1220        }
1221
1222        try {
1223            strsrch.setIndex(128);
1224            errln("Error expecting set offset error");
1225        } catch (IndexOutOfBoundsException e) {
1226            logln("PASS: strsrch.setIndex(128) failed as expected");
1227        }
1228
1229        for (int index = 0; index < BASIC.length; index++) {
1230            SearchData  search      = BASIC[index];
1231
1232            text =search.text;
1233            pattern = search.pattern;
1234            strsrch.setTarget(new StringCharacterIterator(text));
1235            strsrch.setPattern(pattern);
1236            strsrch.getCollator().setStrength(search.strength);
1237            strsrch.reset();
1238
1239            int count = 0;
1240            int matchindex  = search.offset[count];
1241
1242            while (matchindex >= 0) {
1243                int matchlength = search.size[count];
1244                strsrch.next();
1245                if (matchindex != strsrch.getMatchStart() ||
1246                    matchlength != strsrch.getMatchLength()) {
1247                    errln("Text: " + text);
1248                    errln("Pattern: " + strsrch.getPattern());
1249                    errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1250                    return;
1251                }
1252                matchindex = search.offset[count + 1] == -1 ? -1 :
1253                             search.offset[count + 2];
1254                if (search.offset[count + 1] != -1) {
1255                    strsrch.setIndex(search.offset[count + 1] + 1);
1256                    if (strsrch.getIndex() != search.offset[count + 1] + 1) {
1257                        errln("Error setting offset\n");
1258                        return;
1259                    }
1260                }
1261
1262                count += 2;
1263            }
1264            strsrch.next();
1265            if (strsrch.getMatchStart() != StringSearch.DONE) {
1266                errln("Text: " + text);
1267                errln("Pattern: " + strsrch.getPattern());
1268                errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1269                return;
1270            }
1271        }
1272        strsrch.getCollator().setStrength(TERTIARY);
1273    }
1274
1275    @Test
1276    public void TestGetSetOffsetCanonical() {
1277
1278        String  text = "text";
1279        String  pattern = "pattern";
1280        StringSearch  strsrch = null;
1281        try {
1282            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1283        } catch (Exception e) {
1284            errln("Fail to open StringSearch!");
1285            return;
1286        }
1287        strsrch.setCanonical(true);
1288        //TODO: setCanonical is not sufficient for canonical match. See #10725
1289        strsrch.getCollator().setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1290        /* testing out of bounds error */
1291        try {
1292            strsrch.setIndex(-1);
1293            errln("Error expecting set offset error");
1294        } catch (IndexOutOfBoundsException e) {
1295            logln("PASS: strsrch.setIndex(-1) failed as expected");
1296        }
1297        try {
1298            strsrch.setIndex(128);
1299            errln("Error expecting set offset error");
1300        } catch (IndexOutOfBoundsException e) {
1301            logln("PASS: strsrch.setIndex(128) failed as expected");
1302        }
1303
1304        for (int index = 0; index < BASICCANONICAL.length; index++) {
1305            SearchData  search      = BASICCANONICAL[index];
1306            text = search.text;
1307            pattern = search.pattern;
1308            strsrch.setTarget(new StringCharacterIterator(text));
1309            strsrch.setPattern(pattern);
1310            int         count       = 0;
1311            int matchindex  = search.offset[count];
1312            while (matchindex >= 0) {
1313                int matchlength = search.size[count];
1314                strsrch.next();
1315                if (matchindex != strsrch.getMatchStart() ||
1316                    matchlength != strsrch.getMatchLength()) {
1317                    errln("Text: " + text);
1318                    errln("Pattern: " + strsrch.getPattern());
1319                    errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1320                    return;
1321                }
1322                matchindex = search.offset[count + 1] == -1 ? -1 :
1323                             search.offset[count + 2];
1324                if (search.offset[count + 1] != -1) {
1325                    strsrch.setIndex(search.offset[count + 1] + 1);
1326                    if (strsrch.getIndex() != search.offset[count + 1] + 1) {
1327                        errln("Error setting offset");
1328                        return;
1329                    }
1330                }
1331
1332                count += 2;
1333            }
1334            strsrch.next();
1335            if (strsrch.getMatchStart() != StringSearch.DONE) {
1336                errln("Text: " + text);
1337                errln("Pattern: %s" + strsrch.getPattern());
1338                errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1339                return;
1340            }
1341        }
1342        strsrch.getCollator().setStrength(TERTIARY);
1343        strsrch.getCollator().setDecomposition(Collator.NO_DECOMPOSITION);
1344    }
1345
1346    @Test
1347    public void TestIgnorable() {
1348        String rules = IGNORABLERULE;
1349        int        count  = 0;
1350        RuleBasedCollator collator = null;
1351        try {
1352            collator = new RuleBasedCollator(rules);
1353            collator.setStrength(IGNORABLE[count].strength);
1354            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1355        } catch (Exception e) {
1356            errln("Error opening collator ");
1357            return;
1358        }
1359        String pattern = "pattern";
1360        String text = "text";
1361        StringSearch strsrch = null;
1362        try {
1363            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1364        } catch (Exception e) {
1365            errln("Error opening string search ");
1366            return;
1367        }
1368
1369        for (; count < IGNORABLE.length; count++) {
1370            text = IGNORABLE[count].text;
1371            pattern = IGNORABLE[count].pattern;
1372            strsrch.setTarget(new StringCharacterIterator(text));
1373            strsrch.setPattern(pattern);
1374            if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) {
1375                errln("Error at test number " + count);
1376            }
1377        }
1378    }
1379
1380    @Test
1381    public void TestInitialization() {
1382        String  pattern;
1383        String  text;
1384        String  temp = "a";
1385        StringSearch  result;
1386
1387        /* simple test on the pattern ce construction */
1388        pattern = temp + temp;
1389        text = temp + temp + temp;
1390        try {
1391            result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1392        } catch (Exception e) {
1393            errln("Error opening search ");
1394            return;
1395        }
1396
1397        /* testing if an extremely large pattern will fail the initialization */
1398        pattern = "";
1399        for (int count = 0; count < 512; count ++) {
1400            pattern += temp;
1401        }
1402        try {
1403            result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1404            logln("pattern:" + result.getPattern());
1405        } catch (Exception e) {
1406            errln("Fail: an extremely large pattern will fail the initialization");
1407            return;
1408        }
1409    }
1410
1411    @Test
1412    public void TestNormCanonical() {
1413        m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1414        for (int count = 0; count < NORMCANONICAL.length; count++) {
1415            if (!assertCanonicalEqual(NORMCANONICAL[count])) {
1416                errln("Error at test number " + count);
1417            }
1418        }
1419        m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
1420    }
1421
1422    @Test
1423    public void TestNormExact() {
1424        int count;
1425
1426        m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1427        for (count = 0; count < BASIC.length; count++) {
1428            if (!assertEqual(BASIC[count])) {
1429                errln("Error at test number " + count);
1430            }
1431        }
1432        for (count = 0; count < NORMEXACT.length; count++) {
1433            if (!assertEqual(NORMEXACT[count])) {
1434                errln("Error at test number " + count);
1435            }
1436        }
1437        m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
1438        for (count = 0; count < NONNORMEXACT.length; count++) {
1439            if (!assertEqual(NONNORMEXACT[count])) {
1440                errln("Error at test number " + count);
1441            }
1442        }
1443    }
1444
1445    @Test
1446    public void TestOpenClose() {
1447        StringSearch            result;
1448        BreakIterator           breakiter = m_en_wordbreaker_;
1449        String           pattern = "";
1450        String           text = "";
1451        String           temp  = "a";
1452        StringCharacterIterator  chariter= new StringCharacterIterator(text);
1453
1454        /* testing null arguments */
1455        try {
1456            result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1457            errln("Error: null arguments should produce an error");
1458        } catch (Exception e) {
1459            logln("PASS: null arguments failed as expected");
1460        }
1461
1462        chariter.setText(text);
1463        try {
1464            result = new StringSearch(pattern, chariter, null, null);
1465            errln("Error: null arguments should produce an error");
1466        } catch (Exception e) {
1467            logln("PASS: null arguments failed as expected");
1468        }
1469
1470        text  = String.valueOf(0x1);
1471        try {
1472            result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1473            errln("Error: Empty pattern should produce an error");
1474        } catch (Exception e) {
1475            logln("PASS: Empty pattern failed as expected");
1476        }
1477
1478        chariter.setText(text);
1479        try {
1480            result = new StringSearch(pattern, chariter, null, null);
1481            errln("Error: Empty pattern should produce an error");
1482        } catch (Exception e) {
1483            logln("PASS: Empty pattern failed as expected");
1484        }
1485
1486        text = "";
1487        pattern =temp;
1488        try {
1489            result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1490            errln("Error: Empty text should produce an error");
1491        } catch (Exception e) {
1492            logln("PASS: Empty text failed as expected");
1493        }
1494
1495        chariter.setText(text);
1496        try {
1497            result = new StringSearch(pattern, chariter, null, null);
1498            errln("Error: Empty text should produce an error");
1499        } catch (Exception e) {
1500            logln("PASS: Empty text failed as expected");
1501        }
1502
1503        text += temp;
1504        try {
1505            result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1506            errln("Error: null arguments should produce an error");
1507        } catch (Exception e) {
1508            logln("PASS: null arguments failed as expected");
1509        }
1510
1511        chariter.setText(text);
1512        try {
1513            result = new StringSearch(pattern, chariter, null, null);
1514            errln("Error: null arguments should produce an error");
1515        } catch (Exception e) {
1516            logln("PASS: null arguments failed as expected");
1517        }
1518
1519        try {
1520            result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1521        } catch (Exception e) {
1522            errln("Error: null break iterator is valid for opening search");
1523        }
1524
1525        try {
1526            result = new StringSearch(pattern, chariter, m_en_us_, null);
1527        } catch (Exception e) {
1528            errln("Error: null break iterator is valid for opening search");
1529        }
1530
1531        try {
1532            result = new StringSearch(pattern, new StringCharacterIterator(text), Locale.ENGLISH);
1533        } catch (Exception e) {
1534            errln("Error: null break iterator is valid for opening search");
1535        }
1536
1537        try {
1538            result = new StringSearch(pattern, chariter, Locale.ENGLISH);
1539        } catch (Exception e) {
1540            errln("Error: null break iterator is valid for opening search");
1541        }
1542
1543        try {
1544            result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, breakiter);
1545        } catch (Exception e) {
1546            errln("Error: Break iterator is valid for opening search");
1547        }
1548
1549        try {
1550            result = new StringSearch(pattern, chariter, m_en_us_, null);
1551            logln("pattern:" + result.getPattern());
1552        } catch (Exception e) {
1553            errln("Error: Break iterator is valid for opening search");
1554        }
1555    }
1556
1557    @Test
1558    public void TestOverlap() {
1559        int count;
1560
1561        for (count = 0; count < OVERLAP.length; count++) {
1562            if (!assertEqualWithAttribute(OVERLAP[count], false, true)) {
1563                errln("Error at overlap test number " + count);
1564            }
1565        }
1566
1567        for (count = 0; count < NONOVERLAP.length; count++) {
1568            if (!assertEqual(NONOVERLAP[count])) {
1569                errln("Error at non overlap test number " + count);
1570            }
1571        }
1572
1573        for (count = 0; count < OVERLAP.length && count < NONOVERLAP.length; count++) {
1574            SearchData search = (OVERLAP[count]);
1575            String text = search.text;
1576            String pattern = search.pattern;
1577
1578            RuleBasedCollator collator = getCollator(search.collator);
1579            StringSearch strsrch = null;
1580            try {
1581                strsrch  = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1582            } catch (Exception e) {
1583                errln("error open StringSearch");
1584                return;
1585            }
1586
1587            strsrch.setOverlapping(true);
1588            if (!strsrch.isOverlapping()) {
1589                errln("Error setting overlap option");
1590            }
1591            if (!assertEqualWithStringSearch(strsrch, search)) {
1592                return;
1593            }
1594
1595            search = NONOVERLAP[count];
1596            strsrch.setOverlapping(false);
1597            if (strsrch.isOverlapping()) {
1598                errln("Error setting overlap option");
1599            }
1600            strsrch.reset();
1601            if (!assertEqualWithStringSearch(strsrch, search)) {
1602                errln("Error at test number " + count);
1603             }
1604        }
1605    }
1606
1607    @Test
1608    public void TestOverlapCanonical() {
1609        int count;
1610
1611        for (count = 0; count < OVERLAPCANONICAL.length; count++) {
1612            if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true, true)) {
1613                errln("Error at overlap test number %d" + count);
1614            }
1615        }
1616
1617        for (count = 0; count < NONOVERLAP.length; count++) {
1618            if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) {
1619                errln("Error at non overlap test number %d" + count);
1620            }
1621        }
1622
1623        for (count = 0; count < OVERLAPCANONICAL.length && count < NONOVERLAPCANONICAL.length; count++) {
1624            SearchData search = OVERLAPCANONICAL[count];
1625            RuleBasedCollator collator = getCollator(search.collator);
1626            StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), collator, null);
1627            strsrch.setCanonical(true);
1628            strsrch.setOverlapping(true);
1629            if (strsrch.isOverlapping() != true) {
1630                errln("Error setting overlap option");
1631            }
1632            if (!assertEqualWithStringSearch(strsrch, search)) {
1633                strsrch = null;
1634                return;
1635            }
1636            search = NONOVERLAPCANONICAL[count];
1637            strsrch.setOverlapping(false);
1638            if (strsrch.isOverlapping() != false) {
1639                errln("Error setting overlap option");
1640            }
1641            strsrch.reset();
1642            if (!assertEqualWithStringSearch(strsrch, search)) {
1643                strsrch = null;
1644                errln("Error at test number %d" + count);
1645             }
1646        }
1647    }
1648
1649    @Test
1650    public void TestPattern() {
1651        m_en_us_.setStrength(PATTERN[0].strength);
1652        StringSearch strsrch = new StringSearch(PATTERN[0].pattern, new StringCharacterIterator(PATTERN[0].text), m_en_us_, null);
1653
1654        if (strsrch.getPattern() != PATTERN[0].pattern) {
1655            errln("Error setting pattern");
1656        }
1657        if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
1658            m_en_us_.setStrength(TERTIARY);
1659            if (strsrch != null) {
1660                strsrch = null;
1661            }
1662            return;
1663        }
1664
1665        strsrch.setPattern(PATTERN[1].pattern);
1666        if (PATTERN[1].pattern != strsrch.getPattern()) {
1667            errln("Error setting pattern");
1668            m_en_us_.setStrength(TERTIARY);
1669            if (strsrch != null) {
1670                strsrch = null;
1671            }
1672            return;
1673        }
1674        strsrch.reset();
1675
1676        if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) {
1677            m_en_us_.setStrength(TERTIARY);
1678            if (strsrch != null) {
1679                strsrch = null;
1680            }
1681            return;
1682        }
1683
1684        strsrch.setPattern(PATTERN[0].pattern);
1685        if (PATTERN[0].pattern != strsrch.getPattern()) {
1686            errln("Error setting pattern");
1687            m_en_us_.setStrength(TERTIARY);
1688            if (strsrch != null) {
1689                strsrch = null;
1690            }
1691            return;
1692        }
1693            strsrch.reset();
1694
1695        if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
1696            m_en_us_.setStrength(TERTIARY);
1697            if (strsrch != null) {
1698                strsrch = null;
1699            }
1700            return;
1701        }
1702        /* enormous pattern size to see if this crashes */
1703        String pattern = "";
1704        for (int templength = 0; templength != 512; templength ++) {
1705            pattern += 0x61;
1706        }
1707        try{
1708            strsrch.setPattern(pattern);
1709        }catch(Exception e) {
1710            errln("Error setting pattern with size 512");
1711        }
1712
1713        m_en_us_.setStrength(TERTIARY);
1714        if (strsrch != null) {
1715            strsrch = null;
1716        }
1717    }
1718
1719    @Test
1720    public void TestPatternCanonical() {
1721        //StringCharacterIterator text = new StringCharacterIterator(PATTERNCANONICAL[0].text);
1722        m_en_us_.setStrength(PATTERNCANONICAL[0].strength);
1723        StringSearch strsrch = new StringSearch(PATTERNCANONICAL[0].pattern, new StringCharacterIterator(PATTERNCANONICAL[0].text),
1724                                                m_en_us_, null);
1725        strsrch.setCanonical(true);
1726
1727        if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
1728            errln("Error setting pattern");
1729        }
1730        if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
1731            m_en_us_.setStrength(TERTIARY);
1732            strsrch = null;
1733            return;
1734        }
1735
1736        strsrch.setPattern(PATTERNCANONICAL[1].pattern);
1737        if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) {
1738            errln("Error setting pattern");
1739            m_en_us_.setStrength(TERTIARY);
1740            strsrch = null;
1741            return;
1742        }
1743        strsrch.reset();
1744        strsrch.setCanonical(true);
1745
1746        if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) {
1747            m_en_us_.setStrength(TERTIARY);
1748            strsrch = null;
1749            return;
1750        }
1751
1752        strsrch.setPattern(PATTERNCANONICAL[0].pattern);
1753        if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
1754            errln("Error setting pattern");
1755            m_en_us_.setStrength(TERTIARY);
1756            strsrch = null;
1757            return;
1758        }
1759
1760        strsrch.reset();
1761        strsrch.setCanonical(true);
1762        if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
1763            m_en_us_.setStrength(TERTIARY);
1764            strsrch = null;
1765            return;
1766        }
1767    }
1768
1769    @Test
1770    public void TestReset() {
1771        StringCharacterIterator text = new StringCharacterIterator("fish fish");
1772        String pattern = "s";
1773
1774        StringSearch  strsrch = new StringSearch(pattern, text, m_en_us_, null);
1775        strsrch.setOverlapping(true);
1776        strsrch.setCanonical(true);
1777        strsrch.setIndex(9);
1778        strsrch.reset();
1779        if (strsrch.isCanonical() || strsrch.isOverlapping() ||
1780            strsrch.getIndex() != 0 || strsrch.getMatchLength() != 0 ||
1781            strsrch.getMatchStart() != SearchIterator.DONE) {
1782                errln("Error resetting string search");
1783        }
1784
1785        strsrch.previous();
1786        if (strsrch.getMatchStart() != 7 || strsrch.getMatchLength() != 1) {
1787            errln("Error resetting string search\n");
1788        }
1789    }
1790
1791    @Test
1792    public void TestSetMatch() {
1793        for (int count = 0; count < MATCH.length; count++) {
1794            SearchData     search = MATCH[count];
1795            StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text),
1796                                                    m_en_us_, null);
1797
1798            int size = 0;
1799            while (search.offset[size] != -1) {
1800                size ++;
1801            }
1802
1803            if (strsrch.first() != search.offset[0]) {
1804                errln("Error getting first match");
1805            }
1806            if (strsrch.last() != search.offset[size -1]) {
1807                errln("Error getting last match");
1808            }
1809
1810            int index = 0;
1811            while (index < size) {
1812                if (index + 2 < size) {
1813                    if (strsrch.following(search.offset[index + 2] - 1) != search.offset[index + 2]) {
1814                        errln("Error getting following match at index " + (search.offset[index + 2]-1));
1815                    }
1816                }
1817                if (index + 1 < size) {
1818                    if (strsrch.preceding(search.offset[index + 1] + search.size[index + 1] + 1) != search.offset[index + 1]) {
1819                        errln("Error getting preceeding match at index " + (search.offset[index + 1] + 1));
1820                    }
1821                }
1822                index += 2;
1823            }
1824
1825            if (strsrch.following(search.text.length()) != SearchIterator.DONE) {
1826                errln("Error expecting out of bounds match");
1827            }
1828            if (strsrch.preceding(0) != SearchIterator.DONE) {
1829                errln("Error expecting out of bounds match");
1830            }
1831        }
1832    }
1833
1834    @Test
1835    public void TestStrength() {
1836        for (int count = 0; count < STRENGTH.length; count++) {
1837            if (!assertEqual(STRENGTH[count])) {
1838                errln("Error at test number " + count);
1839            }
1840        }
1841    }
1842
1843    @Test
1844    public void TestStrengthCanonical() {
1845        for (int count = 0; count < STRENGTHCANONICAL.length; count++) {
1846            if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) {
1847                errln("Error at test number" + count);
1848            }
1849        }
1850    }
1851
1852    @Test
1853    public void TestSupplementary() {
1854        for (int count = 0; count < SUPPLEMENTARY.length; count++) {
1855            if (!assertEqual(SUPPLEMENTARY[count])) {
1856                errln("Error at test number " + count);
1857            }
1858        }
1859    }
1860
1861    @Test
1862    public void TestSupplementaryCanonical() {
1863        for (int count = 0; count < SUPPLEMENTARYCANONICAL.length; count++) {
1864            if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) {
1865                errln("Error at test number" + count);
1866            }
1867        }
1868    }
1869
1870    @Test
1871    public void TestText() {
1872        SearchData TEXT[] = {
1873            SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)),
1874            SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3))
1875        };
1876        StringCharacterIterator t = new StringCharacterIterator(TEXT[0].text);
1877        StringSearch strsrch = new StringSearch(TEXT[0].pattern, t, m_en_us_, null);
1878
1879        if (!t.equals(strsrch.getTarget())) {
1880            errln("Error setting text");
1881        }
1882        if (!assertEqualWithStringSearch(strsrch, TEXT[0])) {
1883            errln("Error at assertEqualWithStringSearch");
1884            return;
1885        }
1886
1887        t = new StringCharacterIterator(TEXT[1].text);
1888        strsrch.setTarget(t);
1889        if (!t.equals(strsrch.getTarget())) {
1890            errln("Error setting text");
1891            return;
1892        }
1893
1894        if (!assertEqualWithStringSearch(strsrch, TEXT[1])) {
1895            errln("Error at assertEqualWithStringSearch");
1896            return;
1897        }
1898    }
1899
1900    @Test
1901    public void TestTextCanonical() {
1902        StringCharacterIterator t = new StringCharacterIterator(TEXTCANONICAL[0].text);
1903        StringSearch strsrch = new StringSearch(TEXTCANONICAL[0].pattern, t, m_en_us_, null);
1904        strsrch.setCanonical(true);
1905
1906        if (!t.equals(strsrch.getTarget())) {
1907            errln("Error setting text");
1908        }
1909        if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
1910            strsrch = null;
1911            return;
1912        }
1913
1914        t = new StringCharacterIterator(TEXTCANONICAL[1].text);
1915        strsrch.setTarget(t);
1916        if (!t.equals(strsrch.getTarget())) {
1917            errln("Error setting text");
1918            strsrch = null;
1919            return;
1920        }
1921
1922        if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[1])) {
1923            strsrch = null;
1924            return;
1925        }
1926
1927        t = new StringCharacterIterator(TEXTCANONICAL[0].text);
1928        strsrch.setTarget(t);
1929        if (!t.equals(strsrch.getTarget())) {
1930            errln("Error setting text");
1931            strsrch = null;
1932            return;
1933        }
1934
1935        if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
1936            errln("Error at assertEqualWithStringSearch");
1937            strsrch = null;
1938            return;
1939        }
1940    }
1941
1942    @Test
1943    public void TestVariable() {
1944        m_en_us_.setAlternateHandlingShifted(true);
1945        for (int count = 0; count < VARIABLE.length; count++) {
1946            // logln("variable" + count);
1947            if (!assertEqual(VARIABLE[count])) {
1948                errln("Error at test number " + count);
1949            }
1950        }
1951        m_en_us_.setAlternateHandlingShifted(false);
1952    }
1953
1954    @Test
1955    public void TestVariableCanonical() {
1956        m_en_us_.setAlternateHandlingShifted(true);
1957        for (int count = 0; count < VARIABLE.length; count++) {
1958            // logln("variable " + count);
1959            if (!assertCanonicalEqual(VARIABLE[count])) {
1960                errln("Error at test number " + count);
1961            }
1962        }
1963        m_en_us_.setAlternateHandlingShifted(false);
1964    }
1965
1966    @Test
1967    public void TestSubClass()
1968    {
1969        class TestSearch extends SearchIterator
1970        {
1971            String pattern;
1972            String text;
1973
1974            TestSearch(StringCharacterIterator target, BreakIterator breaker,
1975                       String pattern)
1976            {
1977                super(target, breaker);
1978                this.pattern = pattern;
1979                StringBuffer buffer = new StringBuffer();
1980                while (targetText.getIndex() != targetText.getEndIndex()) {
1981                    buffer.append(targetText.current());
1982                    targetText.next();
1983                }
1984                text = buffer.toString();
1985                targetText.setIndex(targetText.getBeginIndex());
1986            }
1987            @Override
1988            protected int handleNext(int start)
1989            {
1990                int match = text.indexOf(pattern, start);
1991                if (match < 0) {
1992                    targetText.last();
1993                    return DONE;
1994                }
1995                targetText.setIndex(match);
1996                setMatchLength(pattern.length());
1997                return match;
1998            }
1999            @Override
2000            protected int handlePrevious(int start)
2001            {
2002                int match = text.lastIndexOf(pattern, start - 1);
2003                if (match < 0) {
2004                    targetText.setIndex(0);
2005                    return DONE;
2006                }
2007                targetText.setIndex(match);
2008                setMatchLength(pattern.length());
2009                return match;
2010            }
2011
2012            @Override
2013            public int getIndex()
2014            {
2015                int result = targetText.getIndex();
2016                if (result < 0 || result >= text.length()) {
2017                    return DONE;
2018                }
2019                return result;
2020            }
2021        }
2022
2023        TestSearch search = new TestSearch(
2024                            new StringCharacterIterator("abc abcd abc"),
2025                            null, "abc");
2026        int expected[] = {0, 4, 9};
2027        for (int i = 0; i < expected.length; i ++) {
2028            if (search.next() != expected[i]) {
2029                errln("Error getting next match");
2030            }
2031            if (search.getMatchLength() != search.pattern.length()) {
2032                errln("Error getting next match length");
2033            }
2034        }
2035        if (search.next() != SearchIterator.DONE) {
2036            errln("Error should have reached the end of the iteration");
2037        }
2038        for (int i = expected.length - 1; i >= 0; i --) {
2039            if (search.previous() != expected[i]) {
2040                errln("Error getting next match");
2041            }
2042            if (search.getMatchLength() != search.pattern.length()) {
2043                errln("Error getting next match length");
2044            }
2045        }
2046        if (search.previous() != SearchIterator.DONE) {
2047            errln("Error should have reached the start of the iteration");
2048        }
2049    }
2050
2051    //Test for ticket 5024
2052    @Test
2053    public void TestDiactricMatch() {
2054        String pattern = "pattern";
2055        String text = "text";
2056        StringSearch strsrch = null;
2057        try {
2058            strsrch = new StringSearch(pattern, text);
2059        } catch (Exception e) {
2060            errln("Error opening string search ");
2061            return;
2062        }
2063
2064        for (int count = 0; count < DIACTRICMATCH.length; count++) {
2065            strsrch.setCollator(getCollator(DIACTRICMATCH[count].collator));
2066            strsrch.getCollator().setStrength(DIACTRICMATCH[count].strength);
2067            strsrch.setBreakIterator(getBreakIterator(DIACTRICMATCH[count].breaker));
2068            strsrch.reset();
2069            text = DIACTRICMATCH[count].text;
2070            pattern = DIACTRICMATCH[count].pattern;
2071            strsrch.setTarget(new StringCharacterIterator(text));
2072            strsrch.setPattern(pattern);
2073            if (!assertEqualWithStringSearch(strsrch, DIACTRICMATCH[count])) {
2074                errln("Error at test number " + count);
2075            }
2076        }
2077    }
2078
2079    @Test
2080    public void TestUsingSearchCollator() {
2081        String scKoText =
2082            " " +
2083    /*01*/  "\uAC00 " +                   // simple LV Hangul
2084    /*03*/  "\uAC01 " +                   // simple LVT Hangul
2085    /*05*/  "\uAC0F " +                   // LVTT, last jamo expands for search
2086    /*07*/  "\uAFFF " +                   // LLVVVTT, every jamo expands for search
2087    /*09*/  "\u1100\u1161\u11A8 " +       // 0xAC01 as conjoining jamo
2088    /*13*/  "\u1100\u1161\u1100 " +       // 0xAC01 as basic conjoining jamo (per search rules)
2089    /*17*/  "\u3131\u314F\u3131 " +       // 0xAC01 as compatibility jamo
2090    /*21*/  "\u1100\u1161\u11B6 " +       // 0xAC0F as conjoining jamo; last expands for search
2091    /*25*/  "\u1100\u1161\u1105\u1112 " + // 0xAC0F as basic conjoining jamo; last expands for search
2092    /*30*/  "\u1101\u1170\u11B6 " +       // 0xAFFF as conjoining jamo; all expand for search
2093    /*34*/  "\u00E6 " +                   // small letter ae, expands
2094    /*36*/  "\u1E4D " +                   // small letter o with tilde and acute, decomposes
2095            "";
2096
2097        String scKoPat0 = "\uAC01";
2098        String scKoPat1 = "\u1100\u1161\u11A8"; // 0xAC01 as conjoining jamo
2099        String scKoPat2 = "\uAC0F";
2100        String scKoPat3 = "\u1100\u1161\u1105\u1112"; // 0xAC0F as basic conjoining jamo
2101        String scKoPat4 = "\uAFFF";
2102        String scKoPat5 = "\u1101\u1170\u11B6"; // 0xAFFF as conjoining jamo
2103
2104        int[] scKoSrchOff01 = { 3,  9, 13 };
2105        int[] scKoSrchOff23 = { 5, 21, 25 };
2106        int[] scKoSrchOff45 = { 7, 30     };
2107
2108        int[] scKoStndOff01 = { 3,  9 };
2109        int[] scKoStndOff2  = { 5, 21 };
2110        int[] scKoStndOff3  = { 25    };
2111        int[] scKoStndOff45 = { 7, 30 };
2112
2113        class PatternAndOffsets {
2114            private String pattern;
2115            private int[] offsets;
2116            PatternAndOffsets(String pat, int[] offs) {
2117                pattern = pat;
2118                offsets = offs;
2119            }
2120            public String getPattern() { return pattern; }
2121            public int[] getOffsets() { return offsets; }
2122        }
2123        final PatternAndOffsets[] scKoSrchPatternsOffsets = {
2124            new PatternAndOffsets( scKoPat0, scKoSrchOff01 ),
2125            new PatternAndOffsets( scKoPat1, scKoSrchOff01 ),
2126            new PatternAndOffsets( scKoPat2, scKoSrchOff23 ),
2127            new PatternAndOffsets( scKoPat3, scKoSrchOff23 ),
2128            new PatternAndOffsets( scKoPat4, scKoSrchOff45 ),
2129            new PatternAndOffsets( scKoPat5, scKoSrchOff45 ),
2130        };
2131        final PatternAndOffsets[] scKoStndPatternsOffsets = {
2132            new PatternAndOffsets( scKoPat0, scKoStndOff01 ),
2133            new PatternAndOffsets( scKoPat1, scKoStndOff01 ),
2134            new PatternAndOffsets( scKoPat2, scKoStndOff2  ),
2135            new PatternAndOffsets( scKoPat3, scKoStndOff3  ),
2136            new PatternAndOffsets( scKoPat4, scKoStndOff45 ),
2137            new PatternAndOffsets( scKoPat5, scKoStndOff45 ),
2138        };
2139
2140        class TUSCItem {
2141            private String localeString;
2142            private String text;
2143            private PatternAndOffsets[] patternsAndOffsets;
2144            TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) {
2145                localeString = locStr;
2146                text = txt;
2147                patternsAndOffsets = patsAndOffs;
2148            }
2149            public String getLocaleString() { return localeString; }
2150            public String getText() { return text; }
2151            public PatternAndOffsets[] getPatternsAndOffsets() { return patternsAndOffsets; }
2152        }
2153        final TUSCItem[] tuscItems = {
2154            new TUSCItem( "root",                  scKoText, scKoStndPatternsOffsets ),
2155            new TUSCItem( "root@collation=search", scKoText, scKoSrchPatternsOffsets ),
2156            new TUSCItem( "ko@collation=search",   scKoText, scKoSrchPatternsOffsets ),
2157        };
2158
2159        String dummyPat = "a";
2160
2161        for (TUSCItem tuscItem: tuscItems) {
2162            String localeString = tuscItem.getLocaleString();
2163            ULocale uloc = new ULocale(localeString);
2164            RuleBasedCollator col = null;
2165            try {
2166                col = (RuleBasedCollator)Collator.getInstance(uloc);
2167            } catch (Exception e) {
2168                errln("Error: in locale " + localeString + ", err in Collator.getInstance");
2169                continue;
2170            }
2171            StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText());
2172            StringSearch srch = new StringSearch(dummyPat, ci, col);
2173            for ( PatternAndOffsets patternAndOffsets: tuscItem.getPatternsAndOffsets() ) {
2174                srch.setPattern(patternAndOffsets.getPattern());
2175                int[] offsets = patternAndOffsets.getOffsets();
2176                int ioff, noff = offsets.length;
2177                int offset;
2178
2179                srch.reset();
2180                ioff = 0;
2181                while (true) {
2182                    offset = srch.next();
2183                    if (offset == SearchIterator.DONE) {
2184                        break;
2185                    }
2186                    if ( ioff < noff ) {
2187                        if ( offset != offsets[ioff] ) {
2188                            errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset);
2189                            //ioff = noff;
2190                            //break;
2191                        }
2192                        ioff++;
2193                    } else {
2194                        errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected");
2195                    }
2196                }
2197                if ( ioff < noff ) {
2198                    errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected");
2199                }
2200
2201                srch.reset();
2202                ioff = noff;
2203                while (true) {
2204                    offset = srch.previous();
2205                    if (offset == SearchIterator.DONE) {
2206                        break;
2207                    }
2208                    if ( ioff > 0 ) {
2209                        ioff--;
2210                        if ( offset != offsets[ioff] ) {
2211                             errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset);
2212                            //ioff = 0;
2213                            // break;
2214                        }
2215                    } else {
2216                        errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected");
2217                    }
2218                }
2219                if ( ioff > 0 ) {
2220                    errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected");
2221                }
2222            }
2223        }
2224    }
2225
2226    @Test
2227    public void TestIndicPrefixMatch() {
2228        for (int count = 0; count < INDICPREFIXMATCH.length; count++) {
2229            if (!assertEqual(INDICPREFIXMATCH[count])) {
2230                errln("Error at test number" + count);
2231            }
2232        }
2233    }
2234
2235
2236    // Test case for ticket#12555
2237    @Test
2238    public void TestLongPattern() {
2239        StringBuilder pattern = new StringBuilder();
2240        for (int i = 0; i < 255; i++) {
2241            pattern.append('a');
2242        }
2243        // appends a character producing multiple ce32 at
2244        // index 256.
2245        pattern.append('á');
2246
2247        CharacterIterator target = new StringCharacterIterator("not important");
2248        try {
2249            StringSearch ss = new StringSearch(pattern.toString(), target, Locale.ENGLISH);
2250            assertNotNull("Non-null StringSearch instance", ss);
2251        } catch (Exception e) {
2252            errln("Error initializing a new StringSearch object");
2253        }
2254    }
2255}
2256