1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 2000-2015, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 */
9
10/**
11 * Port From:   ICU4C v2.1 : collate/StringSearchTest
12 * Source File: $ICU4CRoot/source/test/intltest/srchtest.cpp
13 **/
14
15package com.ibm.icu.dev.test.search;
16
17import static com.ibm.icu.text.Collator.IDENTICAL;
18import static com.ibm.icu.text.Collator.PRIMARY;
19import static com.ibm.icu.text.Collator.QUATERNARY;
20import static com.ibm.icu.text.Collator.SECONDARY;
21import static com.ibm.icu.text.Collator.TERTIARY;
22import static com.ibm.icu.text.SearchIterator.ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD;
23import static com.ibm.icu.text.SearchIterator.ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD;
24import static com.ibm.icu.text.SearchIterator.ElementComparisonType.STANDARD_ELEMENT_COMPARISON;
25
26import java.text.CharacterIterator;
27import java.text.StringCharacterIterator;
28import java.util.Locale;
29
30import org.junit.Before;
31import org.junit.Test;
32import org.junit.runner.RunWith;
33import org.junit.runners.JUnit4;
34
35import com.ibm.icu.dev.test.TestFmwk;
36import com.ibm.icu.text.BreakIterator;
37import com.ibm.icu.text.Collator;
38import com.ibm.icu.text.RuleBasedCollator;
39import com.ibm.icu.text.SearchIterator;
40import com.ibm.icu.text.SearchIterator.ElementComparisonType;
41import com.ibm.icu.text.StringSearch;
42import com.ibm.icu.util.ULocale;
43
44@RunWith(JUnit4.class)
45public class SearchTest extends TestFmwk {
46
47    //inner class
48    static class SearchData {
49        SearchData(String text, String pattern,
50                    String coll, int strength, ElementComparisonType cmpType, String breaker,
51                    int[] offset, int[] size) {
52            this.text = text;
53            this.pattern = pattern;
54            this.collator = coll;
55            this.strength = strength;
56            this.cmpType = cmpType;
57            this.breaker = breaker;
58            this.offset = offset;
59            this.size = size;
60        }
61        String              text;
62        String              pattern;
63        String              collator;
64        int                 strength;
65        ElementComparisonType   cmpType;
66        String              breaker;
67        int[]               offset;
68        int[]               size;
69    }
70
71    RuleBasedCollator m_en_us_;
72    RuleBasedCollator m_fr_fr_;
73    RuleBasedCollator m_de_;
74    RuleBasedCollator m_es_;
75    BreakIterator     m_en_wordbreaker_;
76    BreakIterator     m_en_characterbreaker_;
77
78    // Just calling SearchData constructor, to make the test data source code
79    // nice and short
80    private static SearchData SD(String text, String pattern, String coll, int strength,
81                    ElementComparisonType cmpType, String breaker, int[] offset, int[] size) {
82        return new SearchData(text, pattern, coll, strength, cmpType, breaker, offset, size);
83    }
84
85    // Just returning int[], to make the test data nice and short
86    private static int[] IA(int... elements) {
87        return elements;
88    }
89
90    static SearchData[] BASIC = {
91        SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
92        SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)),
93        SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)),
94        SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)),
95        SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)),
96        SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
97        SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)),
98        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
99        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
100        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
101        SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
102        SD("\u00c9", "e", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
103    };
104
105    SearchData BREAKITERATOREXACT[] = {
106        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)),
107        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)),
108        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)),
109        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)),
110        SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)),
111        /* jitterbug 1745 */
112        SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)),
113        SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)),
114        SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)),
115    };
116
117    SearchData BREAKITERATORCANONICAL[] = {
118        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)),
119        SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)),
120        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)),
121        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)),
122        SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)),
123        /* jitterbug 1745 */
124        SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)),
125        SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)),
126        SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)),
127    };
128
129    SearchData BASICCANONICAL[] = {
130        SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
131        SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)),
132        SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)),
133        SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)),
134        SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)),
135        SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
136        SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)),
137
138        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
139        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
140        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
141        SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
142        SD("a\u0300\u0325b", "\u0300b", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
143        SD("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
144        SD("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
145        SD("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
146
147        SD("\u00c4\u0323", "A\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
148        SD("\u0308\u0323", "\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
149    };
150
151    SearchData COLLATOR[] = {
152        /* english */
153        SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
154        /* tailored */
155        SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)),
156    };
157
158    String TESTCOLLATORRULE = "& o,O ; p,P";
159    String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc";
160
161    SearchData COLLATORCANONICAL[] = {
162        /* english */
163        SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
164        /* tailored */
165        SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)),
166    };
167
168    SearchData COMPOSITEBOUNDARIES[] = {
169        SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
170        SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
171        SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
172        SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
173        SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
174        SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
175
176        /* first one matches only because it's at the start of the text */
177        SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
178
179        /* \\u0300 blocked by \\u0300 */
180        SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
181
182        /* A + 030A + 0301 */
183        SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
184        SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
185
186        SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
187        SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
188
189        SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
190
191        SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
192
193        /* blocked accent */
194        SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
195        SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
196
197        SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
198        SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
199        SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
200
201        SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
202
203        SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
204        SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
205
206        SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
207
208        SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
209        SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
210        SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)),
211    };
212
213    SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
214        SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
215        SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
216        SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)),
217        SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
218        SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
219        SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
220
221        /* first one matches only because it's at the start of the text */
222        SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
223
224        /* \u0300 blocked by \u0300 */
225        SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
226
227        /* A + 030A + 0301 */
228        SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
229        SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
230
231        SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
232        SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
233
234        SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
235
236        SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
237
238        /* blocked accent */
239        SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
240        SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
241
242        SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
243        SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
244        SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
245
246        SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
247
248        SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
249        SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
250
251        SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)),
252
253        SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
254        SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
255
256        SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)),
257    };
258
259    SearchData SUPPLEMENTARY[] = {
260        SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00",
261                "\uD800\uDC00", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)),
262        SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)),
263        SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
264        SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
265        SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
266        SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
267    };
268
269    String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315";
270
271    SearchData CONTRACTION[] = {
272        /* common discontiguous */
273        SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
274
275        SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
276
277        /* contraction prefix */
278        SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
279
280        SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
281        SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
282
283        /*
284         * discontiguous problem here for backwards iteration. accents not found because discontiguous stores all
285         * information
286         */
287        SD("X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
288        /* ends not with a contraction character */
289        SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
290        SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
291        SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
292        /* blocked discontiguous */
293        SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
294
295        /*
296         * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the
297         * match fails because it ends in the middle of an expansion...
298         */
299        SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
300    };
301
302    SearchData CONTRACTIONCANONICAL[] = {
303        /* common discontiguous */
304        SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
305        SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
306
307        /* contraction prefix */
308        SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
309
310        SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
311        SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
312
313        /*
314         * discontiguous problem here for backwards iteration. forwards gives 0, 4 but backwards give 1, 3
315         */
316        /*
317         * {"X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, {0, -1), {4}),
318         */
319
320        /* ends not with a contraction character */
321        SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
322        SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
323
324        SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
325
326        /* blocked discontiguous */
327        SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
328
329        /*
330         * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the
331         * match fails because it ends in the middle of an expansion...
332         */
333        SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(2)),
334    };
335
336    SearchData MATCH[] = {
337        SD("a busy bee is a very busy beeee", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, -1), IA(3, 3)),
338        /*  012345678901234567890123456789012345678901234567890 */
339        SD("a busy bee is a very busy beeee with no bee life", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, 40, -1), IA(3, 3, 3)),
340    };
341
342    String IGNORABLERULE = "&a = \u0300";
343
344    SearchData IGNORABLE[] = {
345        /*
346         * This isn't much of a test when matches have to be on grapheme boundiaries. The match at 0 only works because it's
347         * at the start of the text.
348         */
349        SD("\u0300\u0315 \u0300\u0315 ", "\u0300", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)),
350    };
351
352    SearchData DIACTRICMATCH[] = {
353        SD("\u0061\u0061\u00E1", "\u0061\u00E1", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(2)),
354        SD("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020", "\u00C2\u0303",
355            null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 4, 5, 6, 7, 10, 12, 13, 16, -1), IA(2, 1, 1, 1, 3, 2, 1, 3, 2)),
356        SD("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 5, -1), IA(4, 3)),
357    };
358
359    SearchData NORMCANONICAL[] = {
360        SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
361        SD("\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
362        SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
363        SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
364        SD("a\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
365        SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
366    };
367
368    SearchData NORMEXACT[] = {
369        SD("a\u0300\u0325", "a\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)),
370    };
371
372    SearchData NONNORMEXACT[] = {
373        SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
374    };
375
376    SearchData OVERLAP[] = {
377        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)),
378    };
379
380    SearchData NONOVERLAP[] = {
381        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)),
382    };
383
384    SearchData OVERLAPCANONICAL[] = {
385        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)),
386    };
387
388    SearchData NONOVERLAPCANONICAL[] = {
389        SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)),
390    };
391
392    SearchData PATTERNCANONICAL[] = {
393        SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)),
394        SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
395    };
396
397    SearchData PATTERN[] = {
398        SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)),
399        SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
400    };
401
402    String PECHE_WITH_ACCENTS = "un p\u00E9ch\u00E9, "
403                                + "\u00E7a p\u00E8che par, "
404                                + "p\u00E9cher, "
405                                + "une p\u00EAche, "
406                                + "un p\u00EAcher, "
407                                + "j\u2019ai p\u00EAch\u00E9, "
408                                + "un p\u00E9cheur, "
409                                + "\u201Cp\u00E9che\u201D, "
410                                + "decomp peche\u0301, "
411                                + "base peche";
412    // in the above, the interesting words and their offsets are:
413    //    3 pe<301>che<301>
414    //    13 pe<300>che
415    //    24 pe<301>cher
416    //    36 pe<302>che
417    //    46 pe<302>cher
418    //    59 pe<302>che<301>
419    //    69 pe<301>cheur
420    //    79 pe<301>che
421    //    94 peche<+301>
422    //    107 peche
423
424    SearchData STRENGTH[] = {
425        /*  012345678901234567890123456789012345678901234567890123456789 */
426        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
427        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)),
428        SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
429                "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)),
430        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)),
431        SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)),
432        SD("\u00c0 should match but not A", "A\u0300", "en", IDENTICAL, STANDARD_ELEMENT_COMPARISON,  null, IA(0, -1), IA(1, 0)),
433
434        /* some tests for modified element comparison, ticket #7093 */
435        SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
436        SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
437        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)),
438        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
439        SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
440        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)),
441        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)),
442        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)),
443        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)),
444        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)),
445        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)),
446        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
447        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
448        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
449        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
450        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
451        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
452        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
453        SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
454
455        /* more tests for modified element comparison (with fr), ticket #7093 */
456        SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
457        SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
458        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)),
459        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
460        SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
461        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)),
462        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)),
463        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)),
464        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)),
465        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)),
466        SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)),
467        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
468        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
469        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
470        SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
471        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)),
472        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)),
473        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)),
474        SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)),
475
476    };
477
478    SearchData STRENGTHCANONICAL[] = {
479        /*  012345678901234567890123456789012345678901234567890123456789 */
480        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)),
481        SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)),
482        SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
483                "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)),
484        SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)),
485        SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)),
486    };
487
488    SearchData SUPPLEMENTARYCANONICAL[] = {
489        /*  012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
490        SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", "\uD800\uDC00",
491            null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)),
492        SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)),
493        SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
494        SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
495        SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
496        SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)),
497    };
498
499    static SearchData VARIABLE[] = {
500        /*  012345678901234567890123456789012345678901234567890123456789 */
501        SD("blackbirds black blackbirds blackbird black-bird", "blackbird", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 17, 28, 38, -1), IA(9, 9, 9, 10)),
502
503        /*
504         * to see that it doesn't go into an infinite loop if the start of text is a ignorable character
505         */
506        SD(" on", "go", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
507        SD("abcdefghijklmnopqrstuvwxyz", "   ",
508            null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null,
509            IA(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1),
510            IA(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
511
512        /* testing tightest match */
513        SD(" abc  a bc   ab c    a  bc     ab  c", "abc", null, QUATERNARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(3)),
514        /*  012345678901234567890123456789012345678901234567890123456789 */
515        SD(" abc  a bc   ab c    a  bc     ab  c", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 6, 13, 21, 31, -1), IA(3, 4, 4, 5, 5)),
516
517        /* totally ignorable text */
518        SD("           ---------------", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)),
519    };
520
521    static SearchData TEXTCANONICAL[] = {
522        SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)),
523        SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)),
524    };
525
526    static SearchData INDICPREFIXMATCH[] = {
527        SD("\u0915\u0020\u0915\u0901\u0020\u0915\u0902\u0020\u0915\u0903\u0020\u0915\u0940\u0020\u0915\u093F\u0020\u0915\u0943\u0020\u0915\u093C\u0020\u0958",
528                "\u0915", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 5, 8, 11, 14, 17, 20, 23,-1), IA(1, 2, 2, 2, 1, 1, 1, 2, 1)),
529        SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947",
530                "\u0915\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 3, 7, 11, -1), IA(2, 2, 2, 2)),
531        SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947",
532                "\u0915\u0943\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 19, -1), IA(3, 3)),
533    };
534
535    /**
536     * Constructor
537     */
538    public SearchTest()
539    {
540
541    }
542
543    @Before
544    public void init() throws Exception {
545        m_en_us_ = (RuleBasedCollator)Collator.getInstance(Locale.US);
546        m_fr_fr_ = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE);
547        m_de_ = (RuleBasedCollator)Collator.getInstance(new Locale("de", "DE"));
548        m_es_ = (RuleBasedCollator)Collator.getInstance(new Locale("es", "ES"));
549        m_en_wordbreaker_ = BreakIterator.getWordInstance();
550        m_en_characterbreaker_ = BreakIterator.getCharacterInstance();
551        String rules = m_de_.getRules() + EXTRACOLLATIONRULE;
552        m_de_ = new RuleBasedCollator(rules);
553        rules = m_es_.getRules() + EXTRACOLLATIONRULE;
554        m_es_ = new RuleBasedCollator(rules);
555
556    }
557
558    RuleBasedCollator getCollator(String collator) {
559        if (collator == null) {
560            return m_en_us_;
561        } if (collator.equals("fr")) {
562            return m_fr_fr_;
563        } else if (collator.equals("de")) {
564            return m_de_;
565        } else if (collator.equals("es")) {
566            return m_es_;
567        } else {
568            return m_en_us_;
569        }
570    }
571
572    BreakIterator getBreakIterator(String breaker) {
573        if (breaker == null) {
574            return null;
575        } if (breaker.equals("wordbreaker")) {
576            return m_en_wordbreaker_;
577        } else {
578            return m_en_characterbreaker_;
579        }
580    }
581
582    boolean assertCanonicalEqual(SearchData search) {
583        Collator      collator = getCollator(search.collator);
584        BreakIterator breaker  = getBreakIterator(search.breaker);
585        StringSearch  strsrch;
586
587        String text = search.text;
588        String  pattern = search.pattern;
589
590        if (breaker != null) {
591            breaker.setText(text);
592        }
593        collator.setStrength(search.strength);
594        collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
595        try {
596            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
597            strsrch.setElementComparisonType(search.cmpType);
598            strsrch.setCanonical(true);
599        } catch (Exception e) {
600            errln("Error opening string search" + e.getMessage());
601            return false;
602        }
603
604        if (!assertEqualWithStringSearch(strsrch, search)) {
605            collator.setStrength(TERTIARY);
606            collator.setDecomposition(Collator.NO_DECOMPOSITION);
607            return false;
608        }
609        collator.setStrength(TERTIARY);
610        collator.setDecomposition(Collator.NO_DECOMPOSITION);
611        return true;
612    }
613
614    boolean assertEqual(SearchData search) {
615        Collator      collator = getCollator(search.collator);
616        BreakIterator breaker  = getBreakIterator(search.breaker);
617        StringSearch  strsrch;
618
619        String text = search.text;
620        String  pattern = search.pattern;
621
622        if (breaker != null) {
623            breaker.setText(text);
624        }
625        collator.setStrength(search.strength);
626        try {
627            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
628            strsrch.setElementComparisonType(search.cmpType);
629        } catch (Exception e) {
630            errln("Error opening string search " + e.getMessage());
631            return false;
632        }
633
634        if (!assertEqualWithStringSearch(strsrch, search)) {
635            collator.setStrength(TERTIARY);
636            return false;
637        }
638        collator.setStrength(TERTIARY);
639        return true;
640    }
641
642    boolean assertEqualWithAttribute(SearchData search, boolean canonical, boolean overlap) {
643        Collator      collator = getCollator(search.collator);
644        BreakIterator breaker  = getBreakIterator(search.breaker);
645        StringSearch  strsrch;
646
647        String text = search.text;
648        String  pattern = search.pattern;
649
650        if (breaker != null) {
651            breaker.setText(text);
652        }
653        collator.setStrength(search.strength);
654        try {
655            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker);
656            strsrch.setCanonical(canonical);
657            strsrch.setOverlapping(overlap);
658            strsrch.setElementComparisonType(search.cmpType);
659        } catch (Exception e) {
660            errln("Error opening string search " + e.getMessage());
661            return false;
662        }
663
664        if (!assertEqualWithStringSearch(strsrch, search)) {
665            collator.setStrength(TERTIARY);
666            return false;
667        }
668        collator.setStrength(TERTIARY);
669        return true;
670    }
671
672    boolean assertEqualWithStringSearch(StringSearch strsrch, SearchData search) {
673        int           count       = 0;
674        int   matchindex  = search.offset[count];
675        String matchtext;
676
677        if (strsrch.getMatchStart() != SearchIterator.DONE ||
678            strsrch.getMatchLength() != 0) {
679            errln("Error with the initialization of match start and length");
680        }
681        // start of following matches
682        while (matchindex >= 0) {
683            int matchlength = search.size[count];
684            strsrch.next();
685            //int x = strsrch.getMatchStart();
686            if (matchindex != strsrch.getMatchStart() ||
687                matchlength != strsrch.getMatchLength()) {
688                errln("Text: " + search.text);
689                errln("Searching forward for pattern: " + strsrch.getPattern());
690                errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
691                return false;
692            }
693            count ++;
694
695            matchtext = strsrch.getMatchedText();
696            String targetText = search.text;
697            if (matchlength > 0 &&
698                targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) {
699                errln("Error getting following matched text");
700            }
701
702            matchindex = search.offset[count];
703        }
704        strsrch.next();
705        if (strsrch.getMatchStart() != SearchIterator.DONE ||
706            strsrch.getMatchLength() != 0) {
707                errln("Text: " + search.text);
708                errln("Searching forward for pattern: " + strsrch.getPattern());
709                errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
710                return false;
711        }
712        // start of preceding matches
713        count = count == 0 ? 0 : count - 1;
714        matchindex = search.offset[count];
715        while (matchindex >= 0) {
716            int matchlength = search.size[count];
717            strsrch.previous();
718            if (matchindex != strsrch.getMatchStart() ||
719                matchlength != strsrch.getMatchLength()) {
720                errln("Text: " + search.text);
721                errln("Searching backward for pattern: " + strsrch.getPattern());
722                errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
723                return false;
724            }
725
726            matchtext = strsrch.getMatchedText();
727            String targetText = search.text;
728            if (matchlength > 0 &&
729                targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) {
730                errln("Error getting following matched text");
731            }
732
733            matchindex = count > 0 ? search.offset[count - 1] : -1;
734            count --;
735        }
736        strsrch.previous();
737        if (strsrch.getMatchStart() != SearchIterator.DONE ||
738            strsrch.getMatchLength() != 0) {
739                errln("Text: " + search.text);
740                errln("Searching backward for pattern: " + strsrch.getPattern());
741                errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
742                return false;
743        }
744        return true;
745    }
746
747    @Test
748    public void TestConstructor()
749    {
750        String pattern = "pattern";
751        String text = "text";
752        StringCharacterIterator textiter = new StringCharacterIterator(text);
753        Collator defaultcollator = Collator.getInstance();
754        BreakIterator breaker = BreakIterator.getCharacterInstance();
755        breaker.setText(text);
756        StringSearch search = new StringSearch(pattern, text);
757        if (!search.getPattern().equals(pattern)
758            || !search.getTarget().equals(textiter)
759            || !search.getCollator().equals(defaultcollator)
760            /*|| !search.getBreakIterator().equals(breaker)*/) {
761            errln("StringSearch(String, String) error");
762        }
763        search = new StringSearch(pattern, textiter, m_fr_fr_);
764        if (!search.getPattern().equals(pattern)
765            || !search.getTarget().equals(textiter)
766            || !search.getCollator().equals(m_fr_fr_)
767            /*|| !search.getBreakIterator().equals(breaker)*/) {
768            errln("StringSearch(String, StringCharacterIterator, "
769                  + "RuleBasedCollator) error");
770        }
771        Locale de = new Locale("de", "DE");
772        breaker = BreakIterator.getCharacterInstance(de);
773        breaker.setText(text);
774        search = new StringSearch(pattern, textiter, de);
775        if (!search.getPattern().equals(pattern)
776            || !search.getTarget().equals(textiter)
777            || !search.getCollator().equals(Collator.getInstance(de))
778            /*|| !search.getBreakIterator().equals(breaker)*/) {
779            errln("StringSearch(String, StringCharacterIterator, Locale) "
780                  + "error");
781        }
782
783        search = new StringSearch(pattern, textiter, m_fr_fr_,
784                                  m_en_wordbreaker_);
785        if (!search.getPattern().equals(pattern)
786            || !search.getTarget().equals(textiter)
787            || !search.getCollator().equals(m_fr_fr_)
788            || !search.getBreakIterator().equals(m_en_wordbreaker_)) {
789            errln("StringSearch(String, StringCharacterIterator, Locale) "
790                  + "error");
791        }
792    }
793
794    @Test
795    public void TestBasic() {
796        for (int count = 0; count < BASIC.length; count++) {
797            if (!assertEqual(BASIC[count])) {
798                errln("Error at test number " + count);
799            }
800        }
801    }
802
803    @Test
804    public void TestBreakIterator() {
805
806        String text = BREAKITERATOREXACT[0].text;
807        String pattern = BREAKITERATOREXACT[0].pattern;
808        StringSearch strsrch = null;
809        try {
810            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
811        } catch (Exception e) {
812            errln("Error opening string search");
813            return;
814        }
815
816        strsrch.setBreakIterator(null);
817        if (strsrch.getBreakIterator() != null) {
818            errln("Error usearch_getBreakIterator returned wrong object");
819        }
820
821        strsrch.setBreakIterator(m_en_characterbreaker_);
822        if (!strsrch.getBreakIterator().equals(m_en_characterbreaker_)) {
823            errln("Error usearch_getBreakIterator returned wrong object");
824        }
825
826        strsrch.setBreakIterator(m_en_wordbreaker_);
827        if (!strsrch.getBreakIterator().equals(m_en_wordbreaker_)) {
828            errln("Error usearch_getBreakIterator returned wrong object");
829        }
830
831        int count = 0;
832        while (count < 4) {
833            // special purposes for tests numbers 0-3
834            SearchData        search   = BREAKITERATOREXACT[count];
835            RuleBasedCollator collator = getCollator(search.collator);
836            BreakIterator     breaker  = getBreakIterator(search.breaker);
837                  //StringSearch      strsrch;
838
839            text = search.text;
840            pattern = search.pattern;
841            if (breaker != null) {
842                breaker.setText(text);
843            }
844            collator.setStrength(search.strength);
845            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker);
846            if (strsrch.getBreakIterator() != breaker) {
847                errln("Error setting break iterator");
848            }
849            if (!assertEqualWithStringSearch(strsrch, search)) {
850                collator.setStrength(TERTIARY);
851            }
852            search   = BREAKITERATOREXACT[count + 1];
853            breaker  = getBreakIterator(search.breaker);
854            if (breaker != null) {
855                breaker.setText(text);
856            }
857            strsrch.setBreakIterator(breaker);
858            if (strsrch.getBreakIterator() != breaker) {
859                errln("Error setting break iterator");
860            }
861            strsrch.reset();
862            if (!assertEqualWithStringSearch(strsrch, search)) {
863                 errln("Error at test number " + count);
864            }
865            count += 2;
866        }
867        for (count = 0; count < BREAKITERATOREXACT.length; count++) {
868            if (!assertEqual(BREAKITERATOREXACT[count])) {
869                errln("Error at test number " + count);
870            }
871        }
872    }
873
874    @Test
875    public void TestBreakIteratorCanonical() {
876        int        count  = 0;
877        while (count < 4) {
878            // special purposes for tests numbers 0-3
879            SearchData     search   = BREAKITERATORCANONICAL[count];
880
881            String text = search.text;
882            String pattern = search.pattern;
883            RuleBasedCollator collator = getCollator(search.collator);
884            collator.setStrength(search.strength);
885
886            BreakIterator breaker = getBreakIterator(search.breaker);
887            StringSearch  strsrch = null;
888            try {
889                strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker);
890            } catch (Exception e) {
891                errln("Error creating string search data");
892                return;
893            }
894            strsrch.setCanonical(true);
895            if (!strsrch.getBreakIterator().equals(breaker)) {
896                errln("Error setting break iterator");
897                return;
898            }
899            if (!assertEqualWithStringSearch(strsrch, search)) {
900                collator.setStrength(TERTIARY);
901                return;
902            }
903            search  = BREAKITERATOREXACT[count + 1];
904            breaker = getBreakIterator(search.breaker);
905            breaker.setText(strsrch.getTarget());
906            strsrch.setBreakIterator(breaker);
907            if (!strsrch.getBreakIterator().equals(breaker)) {
908                errln("Error setting break iterator");
909                return;
910            }
911            strsrch.reset();
912            strsrch.setCanonical(true);
913            if (!assertEqualWithStringSearch(strsrch, search)) {
914                 errln("Error at test number " + count);
915                 return;
916            }
917            count += 2;
918        }
919
920        for (count = 0; count < BREAKITERATORCANONICAL.length; count++) {
921             if (!assertEqual(BREAKITERATORCANONICAL[count])) {
922                 errln("Error at test number " + count);
923                 return;
924             }
925        }
926    }
927
928    @Test
929    public void TestCanonical() {
930        for (int count = 0; count < BASICCANONICAL.length; count++) {
931            if (!assertCanonicalEqual(BASICCANONICAL[count])) {
932                errln("Error at test number " + count);
933            }
934        }
935    }
936
937    @Test
938    public void TestCollator() {
939        // test collator that thinks "o" and "p" are the same thing
940        String text = COLLATOR[0].text;
941        String pattern  = COLLATOR[0].pattern;
942        StringSearch strsrch = null;
943        try {
944            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
945        } catch (Exception e) {
946            errln("Error opening string search ");
947            return;
948        }
949        if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
950            return;
951        }
952        String rules = TESTCOLLATORRULE;
953        RuleBasedCollator tailored = null;
954        try {
955            tailored = new RuleBasedCollator(rules);
956            tailored.setStrength(COLLATOR[1].strength);
957        } catch (Exception e) {
958            errln("Error opening rule based collator ");
959            return;
960        }
961
962        strsrch.setCollator(tailored);
963        if (!strsrch.getCollator().equals(tailored)) {
964            errln("Error setting rule based collator");
965        }
966        strsrch.reset();
967        if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) {
968            return;
969        }
970        strsrch.setCollator(m_en_us_);
971        strsrch.reset();
972        if (!strsrch.getCollator().equals(m_en_us_)) {
973            errln("Error setting rule based collator");
974        }
975        if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) {
976           errln("Error searching collator test");
977        }
978    }
979
980    @Test
981    public void TestCollatorCanonical() {
982        /* test collator that thinks "o" and "p" are the same thing */
983        String text = COLLATORCANONICAL[0].text;
984        String pattern = COLLATORCANONICAL[0].pattern;
985
986        StringSearch strsrch = null;
987        try {
988            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
989            strsrch.setCanonical(true);
990        } catch (Exception e) {
991            errln("Error opening string search ");
992        }
993
994        if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
995            return;
996        }
997
998        String rules = TESTCOLLATORRULE;
999        RuleBasedCollator tailored = null;
1000        try {
1001            tailored = new RuleBasedCollator(rules);
1002            tailored.setStrength(COLLATORCANONICAL[1].strength);
1003            tailored.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1004        } catch (Exception e) {
1005            errln("Error opening rule based collator ");
1006        }
1007
1008        strsrch.setCollator(tailored);
1009        if (!strsrch.getCollator().equals(tailored)) {
1010            errln("Error setting rule based collator");
1011        }
1012        strsrch.reset();
1013        strsrch.setCanonical(true);
1014        if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[1])) {
1015            logln("COLLATORCANONICAL[1] failed");  // Error should already be reported.
1016        }
1017        strsrch.setCollator(m_en_us_);
1018        strsrch.reset();
1019        if (!strsrch.getCollator().equals(m_en_us_)) {
1020            errln("Error setting rule based collator");
1021        }
1022        if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) {
1023            logln("COLLATORCANONICAL[0] failed");  // Error should already be reported.
1024        }
1025    }
1026
1027    @Test
1028    public void TestCompositeBoundaries() {
1029        for (int count = 0; count < COMPOSITEBOUNDARIES.length; count++) {
1030            // logln("composite " + count);
1031            if (!assertEqual(COMPOSITEBOUNDARIES[count])) {
1032                errln("Error at test number " + count);
1033            }
1034        }
1035    }
1036
1037    @Test
1038    public void TestCompositeBoundariesCanonical() {
1039        for (int count = 0; count < COMPOSITEBOUNDARIESCANONICAL.length; count++) {
1040            // logln("composite " + count);
1041            if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) {
1042                errln("Error at test number " + count);
1043            }
1044        }
1045    }
1046
1047    @Test
1048    public void TestContraction() {
1049        String rules = CONTRACTIONRULE;
1050        RuleBasedCollator collator = null;
1051        try {
1052            collator = new RuleBasedCollator(rules);
1053            collator.setStrength(TERTIARY);
1054            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1055        } catch (Exception e) {
1056            errln("Error opening collator ");
1057        }
1058        String text = "text";
1059        String pattern = "pattern";
1060        StringSearch strsrch = null;
1061        try {
1062            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1063        } catch (Exception e) {
1064            errln("Error opening string search ");
1065        }
1066
1067        for (int count = 0; count< CONTRACTION.length; count++) {
1068            text = CONTRACTION[count].text;
1069            pattern = CONTRACTION[count].pattern;
1070            strsrch.setTarget(new StringCharacterIterator(text));
1071            strsrch.setPattern(pattern);
1072            if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) {
1073                errln("Error at test number " + count);
1074            }
1075        }
1076    }
1077
1078    @Test
1079    public void TestContractionCanonical() {
1080        String rules = CONTRACTIONRULE;
1081        RuleBasedCollator collator = null;
1082        try {
1083            collator = new RuleBasedCollator(rules);
1084            collator.setStrength(TERTIARY);
1085            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1086        } catch (Exception e) {
1087            errln("Error opening collator ");
1088        }
1089        String text = "text";
1090        String pattern = "pattern";
1091        StringSearch strsrch = null;
1092        try {
1093            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1094            strsrch.setCanonical(true);
1095        } catch (Exception e) {
1096            errln("Error opening string search");
1097        }
1098
1099        for (int count = 0; count < CONTRACTIONCANONICAL.length; count++) {
1100            text = CONTRACTIONCANONICAL[count].text;
1101            pattern = CONTRACTIONCANONICAL[count].pattern;
1102            strsrch.setTarget(new StringCharacterIterator(text));
1103            strsrch.setPattern(pattern);
1104            if (!assertEqualWithStringSearch(strsrch, CONTRACTIONCANONICAL[count])) {
1105                errln("Error at test number " + count);
1106            }
1107        }
1108    }
1109
1110    @Test
1111    public void TestGetMatch() {
1112        SearchData search = MATCH[0];
1113        String text = search.text;
1114        String pattern = search.pattern;
1115
1116        StringSearch strsrch = null;
1117        try {
1118            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1119        } catch (Exception e) {
1120            errln("Error opening string search ");
1121            return;
1122        }
1123
1124        int           count      = 0;
1125        int   matchindex = search.offset[count];
1126        String matchtext;
1127        while (matchindex >= 0) {
1128            int matchlength = search.size[count];
1129            strsrch.next();
1130            if (matchindex != strsrch.getMatchStart() ||
1131                matchlength != strsrch.getMatchLength()) {
1132                errln("Text: " + search.text);
1133                errln("Pattern: " + strsrch.getPattern());
1134                errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1135                return;
1136            }
1137            count++;
1138
1139            matchtext = strsrch.getMatchedText();
1140            if (matchtext.length() != matchlength){
1141                errln("Error getting match text");
1142            }
1143            matchindex = search.offset[count];
1144        }
1145        strsrch.next();
1146        if (strsrch.getMatchStart()  != StringSearch.DONE ||
1147            strsrch.getMatchLength() != 0) {
1148            errln("Error end of match not found");
1149        }
1150        matchtext = strsrch.getMatchedText();
1151        if (matchtext != null) {
1152            errln("Error getting null matches");
1153        }
1154    }
1155
1156    @Test
1157    public void TestGetSetAttribute() {
1158        String  pattern = "pattern";
1159        String  text = "text";
1160        StringSearch  strsrch = null;
1161        try {
1162            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1163        } catch (Exception e) {
1164            errln("Error opening search");
1165            return;
1166        }
1167
1168        if (strsrch.isOverlapping()) {
1169            errln("Error default overlaping should be false");
1170        }
1171        strsrch.setOverlapping(true);
1172        if (!strsrch.isOverlapping()) {
1173            errln("Error setting overlap true");
1174        }
1175        strsrch.setOverlapping(false);
1176        if (strsrch.isOverlapping()) {
1177            errln("Error setting overlap false");
1178        }
1179
1180        strsrch.setCanonical(true);
1181        if (!strsrch.isCanonical()) {
1182            errln("Error setting canonical match true");
1183        }
1184        strsrch.setCanonical(false);
1185        if (strsrch.isCanonical()) {
1186            errln("Error setting canonical match false");
1187        }
1188
1189        if (strsrch.getElementComparisonType() != STANDARD_ELEMENT_COMPARISON) {
1190            errln("Error default element comparison type should be STANDARD_ELEMENT_COMPARISON");
1191        }
1192        strsrch.setElementComparisonType(ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD);
1193        if (strsrch.getElementComparisonType() != ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD) {
1194            errln("Error setting element comparison type PATTERN_BASE_WEIGHT_IS_WILDCARD");
1195        }
1196    }
1197
1198    @Test
1199    public void TestGetSetOffset() {
1200        String  pattern = "1234567890123456";
1201        String  text  = "12345678901234567890123456789012";
1202        StringSearch  strsrch = null;
1203        try {
1204            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1205        } catch (Exception e) {
1206            errln("Error opening search");
1207
1208            return;
1209        }
1210
1211        /* testing out of bounds error */
1212        try {
1213            strsrch.setIndex(-1);
1214            errln("Error expecting set offset error");
1215        } catch (IndexOutOfBoundsException e) {
1216            logln("PASS: strsrch.setIndex(-1) failed as expected");
1217        }
1218
1219        try {
1220            strsrch.setIndex(128);
1221            errln("Error expecting set offset error");
1222        } catch (IndexOutOfBoundsException e) {
1223            logln("PASS: strsrch.setIndex(128) failed as expected");
1224        }
1225
1226        for (int index = 0; index < BASIC.length; index++) {
1227            SearchData  search      = BASIC[index];
1228
1229            text =search.text;
1230            pattern = search.pattern;
1231            strsrch.setTarget(new StringCharacterIterator(text));
1232            strsrch.setPattern(pattern);
1233            strsrch.getCollator().setStrength(search.strength);
1234            strsrch.reset();
1235
1236            int count = 0;
1237            int matchindex  = search.offset[count];
1238
1239            while (matchindex >= 0) {
1240                int matchlength = search.size[count];
1241                strsrch.next();
1242                if (matchindex != strsrch.getMatchStart() ||
1243                    matchlength != strsrch.getMatchLength()) {
1244                    errln("Text: " + text);
1245                    errln("Pattern: " + strsrch.getPattern());
1246                    errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1247                    return;
1248                }
1249                matchindex = search.offset[count + 1] == -1 ? -1 :
1250                             search.offset[count + 2];
1251                if (search.offset[count + 1] != -1) {
1252                    strsrch.setIndex(search.offset[count + 1] + 1);
1253                    if (strsrch.getIndex() != search.offset[count + 1] + 1) {
1254                        errln("Error setting offset\n");
1255                        return;
1256                    }
1257                }
1258
1259                count += 2;
1260            }
1261            strsrch.next();
1262            if (strsrch.getMatchStart() != StringSearch.DONE) {
1263                errln("Text: " + text);
1264                errln("Pattern: " + strsrch.getPattern());
1265                errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1266                return;
1267            }
1268        }
1269        strsrch.getCollator().setStrength(TERTIARY);
1270    }
1271
1272    @Test
1273    public void TestGetSetOffsetCanonical() {
1274
1275        String  text = "text";
1276        String  pattern = "pattern";
1277        StringSearch  strsrch = null;
1278        try {
1279            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1280        } catch (Exception e) {
1281            errln("Fail to open StringSearch!");
1282            return;
1283        }
1284        strsrch.setCanonical(true);
1285        //TODO: setCanonical is not sufficient for canonical match. See #10725
1286        strsrch.getCollator().setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1287        /* testing out of bounds error */
1288        try {
1289            strsrch.setIndex(-1);
1290            errln("Error expecting set offset error");
1291        } catch (IndexOutOfBoundsException e) {
1292            logln("PASS: strsrch.setIndex(-1) failed as expected");
1293        }
1294        try {
1295            strsrch.setIndex(128);
1296            errln("Error expecting set offset error");
1297        } catch (IndexOutOfBoundsException e) {
1298            logln("PASS: strsrch.setIndex(128) failed as expected");
1299        }
1300
1301        for (int index = 0; index < BASICCANONICAL.length; index++) {
1302            SearchData  search      = BASICCANONICAL[index];
1303            text = search.text;
1304            pattern = search.pattern;
1305            strsrch.setTarget(new StringCharacterIterator(text));
1306            strsrch.setPattern(pattern);
1307            int         count       = 0;
1308            int matchindex  = search.offset[count];
1309            while (matchindex >= 0) {
1310                int matchlength = search.size[count];
1311                strsrch.next();
1312                if (matchindex != strsrch.getMatchStart() ||
1313                    matchlength != strsrch.getMatchLength()) {
1314                    errln("Text: " + text);
1315                    errln("Pattern: " + strsrch.getPattern());
1316                    errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1317                    return;
1318                }
1319                matchindex = search.offset[count + 1] == -1 ? -1 :
1320                             search.offset[count + 2];
1321                if (search.offset[count + 1] != -1) {
1322                    strsrch.setIndex(search.offset[count + 1] + 1);
1323                    if (strsrch.getIndex() != search.offset[count + 1] + 1) {
1324                        errln("Error setting offset");
1325                        return;
1326                    }
1327                }
1328
1329                count += 2;
1330            }
1331            strsrch.next();
1332            if (strsrch.getMatchStart() != StringSearch.DONE) {
1333                errln("Text: " + text);
1334                errln("Pattern: %s" + strsrch.getPattern());
1335                errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength());
1336                return;
1337            }
1338        }
1339        strsrch.getCollator().setStrength(TERTIARY);
1340        strsrch.getCollator().setDecomposition(Collator.NO_DECOMPOSITION);
1341    }
1342
1343    @Test
1344    public void TestIgnorable() {
1345        String rules = IGNORABLERULE;
1346        int        count  = 0;
1347        RuleBasedCollator collator = null;
1348        try {
1349            collator = new RuleBasedCollator(rules);
1350            collator.setStrength(IGNORABLE[count].strength);
1351            collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1352        } catch (Exception e) {
1353            errln("Error opening collator ");
1354            return;
1355        }
1356        String pattern = "pattern";
1357        String text = "text";
1358        StringSearch strsrch = null;
1359        try {
1360            strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1361        } catch (Exception e) {
1362            errln("Error opening string search ");
1363            return;
1364        }
1365
1366        for (; count < IGNORABLE.length; count++) {
1367            text = IGNORABLE[count].text;
1368            pattern = IGNORABLE[count].pattern;
1369            strsrch.setTarget(new StringCharacterIterator(text));
1370            strsrch.setPattern(pattern);
1371            if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) {
1372                errln("Error at test number " + count);
1373            }
1374        }
1375    }
1376
1377    @Test
1378    public void TestInitialization() {
1379        String  pattern;
1380        String  text;
1381        String  temp = "a";
1382        StringSearch  result;
1383
1384        /* simple test on the pattern ce construction */
1385        pattern = temp + temp;
1386        text = temp + temp + temp;
1387        try {
1388            result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1389        } catch (Exception e) {
1390            errln("Error opening search ");
1391            return;
1392        }
1393
1394        /* testing if an extremely large pattern will fail the initialization */
1395        pattern = "";
1396        for (int count = 0; count < 512; count ++) {
1397            pattern += temp;
1398        }
1399        try {
1400            result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1401            logln("pattern:" + result.getPattern());
1402        } catch (Exception e) {
1403            errln("Fail: an extremely large pattern will fail the initialization");
1404            return;
1405        }
1406    }
1407
1408    @Test
1409    public void TestNormCanonical() {
1410        m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1411        for (int count = 0; count < NORMCANONICAL.length; count++) {
1412            if (!assertCanonicalEqual(NORMCANONICAL[count])) {
1413                errln("Error at test number " + count);
1414            }
1415        }
1416        m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
1417    }
1418
1419    @Test
1420    public void TestNormExact() {
1421        int count;
1422
1423        m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1424        for (count = 0; count < BASIC.length; count++) {
1425            if (!assertEqual(BASIC[count])) {
1426                errln("Error at test number " + count);
1427            }
1428        }
1429        for (count = 0; count < NORMEXACT.length; count++) {
1430            if (!assertEqual(NORMEXACT[count])) {
1431                errln("Error at test number " + count);
1432            }
1433        }
1434        m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION);
1435        for (count = 0; count < NONNORMEXACT.length; count++) {
1436            if (!assertEqual(NONNORMEXACT[count])) {
1437                errln("Error at test number " + count);
1438            }
1439        }
1440    }
1441
1442    @Test
1443    public void TestOpenClose() {
1444        StringSearch            result;
1445        BreakIterator           breakiter = m_en_wordbreaker_;
1446        String           pattern = "";
1447        String           text = "";
1448        String           temp  = "a";
1449        StringCharacterIterator  chariter= new StringCharacterIterator(text);
1450
1451        /* testing null arguments */
1452        try {
1453            result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1454            errln("Error: null arguments should produce an error");
1455        } catch (Exception e) {
1456            logln("PASS: null arguments failed as expected");
1457        }
1458
1459        chariter.setText(text);
1460        try {
1461            result = new StringSearch(pattern, chariter, null, null);
1462            errln("Error: null arguments should produce an error");
1463        } catch (Exception e) {
1464            logln("PASS: null arguments failed as expected");
1465        }
1466
1467        text  = String.valueOf(0x1);
1468        try {
1469            result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1470            errln("Error: Empty pattern should produce an error");
1471        } catch (Exception e) {
1472            logln("PASS: Empty pattern failed as expected");
1473        }
1474
1475        chariter.setText(text);
1476        try {
1477            result = new StringSearch(pattern, chariter, null, null);
1478            errln("Error: Empty pattern should produce an error");
1479        } catch (Exception e) {
1480            logln("PASS: Empty pattern failed as expected");
1481        }
1482
1483        text = "";
1484        pattern =temp;
1485        try {
1486            result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1487            errln("Error: Empty text should produce an error");
1488        } catch (Exception e) {
1489            logln("PASS: Empty text failed as expected");
1490        }
1491
1492        chariter.setText(text);
1493        try {
1494            result = new StringSearch(pattern, chariter, null, null);
1495            errln("Error: Empty text should produce an error");
1496        } catch (Exception e) {
1497            logln("PASS: Empty text failed as expected");
1498        }
1499
1500        text += temp;
1501        try {
1502            result = new StringSearch(pattern, new StringCharacterIterator(text), null, null);
1503            errln("Error: null arguments should produce an error");
1504        } catch (Exception e) {
1505            logln("PASS: null arguments failed as expected");
1506        }
1507
1508        chariter.setText(text);
1509        try {
1510            result = new StringSearch(pattern, chariter, null, null);
1511            errln("Error: null arguments should produce an error");
1512        } catch (Exception e) {
1513            logln("PASS: null arguments failed as expected");
1514        }
1515
1516        try {
1517            result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null);
1518        } catch (Exception e) {
1519            errln("Error: null break iterator is valid for opening search");
1520        }
1521
1522        try {
1523            result = new StringSearch(pattern, chariter, m_en_us_, null);
1524        } catch (Exception e) {
1525            errln("Error: null break iterator is valid for opening search");
1526        }
1527
1528        try {
1529            result = new StringSearch(pattern, new StringCharacterIterator(text), Locale.ENGLISH);
1530        } catch (Exception e) {
1531            errln("Error: null break iterator is valid for opening search");
1532        }
1533
1534        try {
1535            result = new StringSearch(pattern, chariter, Locale.ENGLISH);
1536        } catch (Exception e) {
1537            errln("Error: null break iterator is valid for opening search");
1538        }
1539
1540        try {
1541            result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, breakiter);
1542        } catch (Exception e) {
1543            errln("Error: Break iterator is valid for opening search");
1544        }
1545
1546        try {
1547            result = new StringSearch(pattern, chariter, m_en_us_, null);
1548            logln("pattern:" + result.getPattern());
1549        } catch (Exception e) {
1550            errln("Error: Break iterator is valid for opening search");
1551        }
1552    }
1553
1554    @Test
1555    public void TestOverlap() {
1556        int count;
1557
1558        for (count = 0; count < OVERLAP.length; count++) {
1559            if (!assertEqualWithAttribute(OVERLAP[count], false, true)) {
1560                errln("Error at overlap test number " + count);
1561            }
1562        }
1563
1564        for (count = 0; count < NONOVERLAP.length; count++) {
1565            if (!assertEqual(NONOVERLAP[count])) {
1566                errln("Error at non overlap test number " + count);
1567            }
1568        }
1569
1570        for (count = 0; count < OVERLAP.length && count < NONOVERLAP.length; count++) {
1571            SearchData search = (OVERLAP[count]);
1572            String text = search.text;
1573            String pattern = search.pattern;
1574
1575            RuleBasedCollator collator = getCollator(search.collator);
1576            StringSearch strsrch = null;
1577            try {
1578                strsrch  = new StringSearch(pattern, new StringCharacterIterator(text), collator, null);
1579            } catch (Exception e) {
1580                errln("error open StringSearch");
1581                return;
1582            }
1583
1584            strsrch.setOverlapping(true);
1585            if (!strsrch.isOverlapping()) {
1586                errln("Error setting overlap option");
1587            }
1588            if (!assertEqualWithStringSearch(strsrch, search)) {
1589                return;
1590            }
1591
1592            search = NONOVERLAP[count];
1593            strsrch.setOverlapping(false);
1594            if (strsrch.isOverlapping()) {
1595                errln("Error setting overlap option");
1596            }
1597            strsrch.reset();
1598            if (!assertEqualWithStringSearch(strsrch, search)) {
1599                errln("Error at test number " + count);
1600             }
1601        }
1602    }
1603
1604    @Test
1605    public void TestOverlapCanonical() {
1606        int count;
1607
1608        for (count = 0; count < OVERLAPCANONICAL.length; count++) {
1609            if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true, true)) {
1610                errln("Error at overlap test number %d" + count);
1611            }
1612        }
1613
1614        for (count = 0; count < NONOVERLAP.length; count++) {
1615            if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) {
1616                errln("Error at non overlap test number %d" + count);
1617            }
1618        }
1619
1620        for (count = 0; count < OVERLAPCANONICAL.length && count < NONOVERLAPCANONICAL.length; count++) {
1621            SearchData search = OVERLAPCANONICAL[count];
1622            RuleBasedCollator collator = getCollator(search.collator);
1623            StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), collator, null);
1624            strsrch.setCanonical(true);
1625            strsrch.setOverlapping(true);
1626            if (strsrch.isOverlapping() != true) {
1627                errln("Error setting overlap option");
1628            }
1629            if (!assertEqualWithStringSearch(strsrch, search)) {
1630                strsrch = null;
1631                return;
1632            }
1633            search = NONOVERLAPCANONICAL[count];
1634            strsrch.setOverlapping(false);
1635            if (strsrch.isOverlapping() != false) {
1636                errln("Error setting overlap option");
1637            }
1638            strsrch.reset();
1639            if (!assertEqualWithStringSearch(strsrch, search)) {
1640                strsrch = null;
1641                errln("Error at test number %d" + count);
1642             }
1643        }
1644    }
1645
1646    @Test
1647    public void TestPattern() {
1648        m_en_us_.setStrength(PATTERN[0].strength);
1649        StringSearch strsrch = new StringSearch(PATTERN[0].pattern, new StringCharacterIterator(PATTERN[0].text), m_en_us_, null);
1650
1651        if (strsrch.getPattern() != PATTERN[0].pattern) {
1652            errln("Error setting pattern");
1653        }
1654        if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
1655            m_en_us_.setStrength(TERTIARY);
1656            if (strsrch != null) {
1657                strsrch = null;
1658            }
1659            return;
1660        }
1661
1662        strsrch.setPattern(PATTERN[1].pattern);
1663        if (PATTERN[1].pattern != strsrch.getPattern()) {
1664            errln("Error setting pattern");
1665            m_en_us_.setStrength(TERTIARY);
1666            if (strsrch != null) {
1667                strsrch = null;
1668            }
1669            return;
1670        }
1671        strsrch.reset();
1672
1673        if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) {
1674            m_en_us_.setStrength(TERTIARY);
1675            if (strsrch != null) {
1676                strsrch = null;
1677            }
1678            return;
1679        }
1680
1681        strsrch.setPattern(PATTERN[0].pattern);
1682        if (PATTERN[0].pattern != strsrch.getPattern()) {
1683            errln("Error setting pattern");
1684            m_en_us_.setStrength(TERTIARY);
1685            if (strsrch != null) {
1686                strsrch = null;
1687            }
1688            return;
1689        }
1690            strsrch.reset();
1691
1692        if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) {
1693            m_en_us_.setStrength(TERTIARY);
1694            if (strsrch != null) {
1695                strsrch = null;
1696            }
1697            return;
1698        }
1699        /* enormous pattern size to see if this crashes */
1700        String pattern = "";
1701        for (int templength = 0; templength != 512; templength ++) {
1702            pattern += 0x61;
1703        }
1704        try{
1705            strsrch.setPattern(pattern);
1706        }catch(Exception e) {
1707            errln("Error setting pattern with size 512");
1708        }
1709
1710        m_en_us_.setStrength(TERTIARY);
1711        if (strsrch != null) {
1712            strsrch = null;
1713        }
1714    }
1715
1716    @Test
1717    public void TestPatternCanonical() {
1718        //StringCharacterIterator text = new StringCharacterIterator(PATTERNCANONICAL[0].text);
1719        m_en_us_.setStrength(PATTERNCANONICAL[0].strength);
1720        StringSearch strsrch = new StringSearch(PATTERNCANONICAL[0].pattern, new StringCharacterIterator(PATTERNCANONICAL[0].text),
1721                                                m_en_us_, null);
1722        strsrch.setCanonical(true);
1723
1724        if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
1725            errln("Error setting pattern");
1726        }
1727        if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
1728            m_en_us_.setStrength(TERTIARY);
1729            strsrch = null;
1730            return;
1731        }
1732
1733        strsrch.setPattern(PATTERNCANONICAL[1].pattern);
1734        if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) {
1735            errln("Error setting pattern");
1736            m_en_us_.setStrength(TERTIARY);
1737            strsrch = null;
1738            return;
1739        }
1740        strsrch.reset();
1741        strsrch.setCanonical(true);
1742
1743        if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) {
1744            m_en_us_.setStrength(TERTIARY);
1745            strsrch = null;
1746            return;
1747        }
1748
1749        strsrch.setPattern(PATTERNCANONICAL[0].pattern);
1750        if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) {
1751            errln("Error setting pattern");
1752            m_en_us_.setStrength(TERTIARY);
1753            strsrch = null;
1754            return;
1755        }
1756
1757        strsrch.reset();
1758        strsrch.setCanonical(true);
1759        if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) {
1760            m_en_us_.setStrength(TERTIARY);
1761            strsrch = null;
1762            return;
1763        }
1764    }
1765
1766    @Test
1767    public void TestReset() {
1768        StringCharacterIterator text = new StringCharacterIterator("fish fish");
1769        String pattern = "s";
1770
1771        StringSearch  strsrch = new StringSearch(pattern, text, m_en_us_, null);
1772        strsrch.setOverlapping(true);
1773        strsrch.setCanonical(true);
1774        strsrch.setIndex(9);
1775        strsrch.reset();
1776        if (strsrch.isCanonical() || strsrch.isOverlapping() ||
1777            strsrch.getIndex() != 0 || strsrch.getMatchLength() != 0 ||
1778            strsrch.getMatchStart() != SearchIterator.DONE) {
1779                errln("Error resetting string search");
1780        }
1781
1782        strsrch.previous();
1783        if (strsrch.getMatchStart() != 7 || strsrch.getMatchLength() != 1) {
1784            errln("Error resetting string search\n");
1785        }
1786    }
1787
1788    @Test
1789    public void TestSetMatch() {
1790        for (int count = 0; count < MATCH.length; count++) {
1791            SearchData     search = MATCH[count];
1792            StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text),
1793                                                    m_en_us_, null);
1794
1795            int size = 0;
1796            while (search.offset[size] != -1) {
1797                size ++;
1798            }
1799
1800            if (strsrch.first() != search.offset[0]) {
1801                errln("Error getting first match");
1802            }
1803            if (strsrch.last() != search.offset[size -1]) {
1804                errln("Error getting last match");
1805            }
1806
1807            int index = 0;
1808            while (index < size) {
1809                if (index + 2 < size) {
1810                    if (strsrch.following(search.offset[index + 2] - 1) != search.offset[index + 2]) {
1811                        errln("Error getting following match at index " + (search.offset[index + 2]-1));
1812                    }
1813                }
1814                if (index + 1 < size) {
1815                    if (strsrch.preceding(search.offset[index + 1] + search.size[index + 1] + 1) != search.offset[index + 1]) {
1816                        errln("Error getting preceeding match at index " + (search.offset[index + 1] + 1));
1817                    }
1818                }
1819                index += 2;
1820            }
1821
1822            if (strsrch.following(search.text.length()) != SearchIterator.DONE) {
1823                errln("Error expecting out of bounds match");
1824            }
1825            if (strsrch.preceding(0) != SearchIterator.DONE) {
1826                errln("Error expecting out of bounds match");
1827            }
1828        }
1829    }
1830
1831    @Test
1832    public void TestStrength() {
1833        for (int count = 0; count < STRENGTH.length; count++) {
1834            if (!assertEqual(STRENGTH[count])) {
1835                errln("Error at test number " + count);
1836            }
1837        }
1838    }
1839
1840    @Test
1841    public void TestStrengthCanonical() {
1842        for (int count = 0; count < STRENGTHCANONICAL.length; count++) {
1843            if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) {
1844                errln("Error at test number" + count);
1845            }
1846        }
1847    }
1848
1849    @Test
1850    public void TestSupplementary() {
1851        for (int count = 0; count < SUPPLEMENTARY.length; count++) {
1852            if (!assertEqual(SUPPLEMENTARY[count])) {
1853                errln("Error at test number " + count);
1854            }
1855        }
1856    }
1857
1858    @Test
1859    public void TestSupplementaryCanonical() {
1860        for (int count = 0; count < SUPPLEMENTARYCANONICAL.length; count++) {
1861            if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) {
1862                errln("Error at test number" + count);
1863            }
1864        }
1865    }
1866
1867    @Test
1868    public void TestText() {
1869        SearchData TEXT[] = {
1870            SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)),
1871            SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3))
1872        };
1873        StringCharacterIterator t = new StringCharacterIterator(TEXT[0].text);
1874        StringSearch strsrch = new StringSearch(TEXT[0].pattern, t, m_en_us_, null);
1875
1876        if (!t.equals(strsrch.getTarget())) {
1877            errln("Error setting text");
1878        }
1879        if (!assertEqualWithStringSearch(strsrch, TEXT[0])) {
1880            errln("Error at assertEqualWithStringSearch");
1881            return;
1882        }
1883
1884        t = new StringCharacterIterator(TEXT[1].text);
1885        strsrch.setTarget(t);
1886        if (!t.equals(strsrch.getTarget())) {
1887            errln("Error setting text");
1888            return;
1889        }
1890
1891        if (!assertEqualWithStringSearch(strsrch, TEXT[1])) {
1892            errln("Error at assertEqualWithStringSearch");
1893            return;
1894        }
1895    }
1896
1897    @Test
1898    public void TestTextCanonical() {
1899        StringCharacterIterator t = new StringCharacterIterator(TEXTCANONICAL[0].text);
1900        StringSearch strsrch = new StringSearch(TEXTCANONICAL[0].pattern, t, m_en_us_, null);
1901        strsrch.setCanonical(true);
1902
1903        if (!t.equals(strsrch.getTarget())) {
1904            errln("Error setting text");
1905        }
1906        if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
1907            strsrch = null;
1908            return;
1909        }
1910
1911        t = new StringCharacterIterator(TEXTCANONICAL[1].text);
1912        strsrch.setTarget(t);
1913        if (!t.equals(strsrch.getTarget())) {
1914            errln("Error setting text");
1915            strsrch = null;
1916            return;
1917        }
1918
1919        if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[1])) {
1920            strsrch = null;
1921            return;
1922        }
1923
1924        t = new StringCharacterIterator(TEXTCANONICAL[0].text);
1925        strsrch.setTarget(t);
1926        if (!t.equals(strsrch.getTarget())) {
1927            errln("Error setting text");
1928            strsrch = null;
1929            return;
1930        }
1931
1932        if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) {
1933            errln("Error at assertEqualWithStringSearch");
1934            strsrch = null;
1935            return;
1936        }
1937    }
1938
1939    @Test
1940    public void TestVariable() {
1941        m_en_us_.setAlternateHandlingShifted(true);
1942        for (int count = 0; count < VARIABLE.length; count++) {
1943            // logln("variable" + count);
1944            if (!assertEqual(VARIABLE[count])) {
1945                errln("Error at test number " + count);
1946            }
1947        }
1948        m_en_us_.setAlternateHandlingShifted(false);
1949    }
1950
1951    @Test
1952    public void TestVariableCanonical() {
1953        m_en_us_.setAlternateHandlingShifted(true);
1954        for (int count = 0; count < VARIABLE.length; count++) {
1955            // logln("variable " + count);
1956            if (!assertCanonicalEqual(VARIABLE[count])) {
1957                errln("Error at test number " + count);
1958            }
1959        }
1960        m_en_us_.setAlternateHandlingShifted(false);
1961    }
1962
1963    @Test
1964    public void TestSubClass()
1965    {
1966        class TestSearch extends SearchIterator
1967        {
1968            String pattern;
1969            String text;
1970
1971            TestSearch(StringCharacterIterator target, BreakIterator breaker,
1972                       String pattern)
1973            {
1974                super(target, breaker);
1975                this.pattern = pattern;
1976                StringBuffer buffer = new StringBuffer();
1977                while (targetText.getIndex() != targetText.getEndIndex()) {
1978                    buffer.append(targetText.current());
1979                    targetText.next();
1980                }
1981                text = buffer.toString();
1982                targetText.setIndex(targetText.getBeginIndex());
1983            }
1984            @Override
1985            protected int handleNext(int start)
1986            {
1987                int match = text.indexOf(pattern, start);
1988                if (match < 0) {
1989                    targetText.last();
1990                    return DONE;
1991                }
1992                targetText.setIndex(match);
1993                setMatchLength(pattern.length());
1994                return match;
1995            }
1996            @Override
1997            protected int handlePrevious(int start)
1998            {
1999                int match = text.lastIndexOf(pattern, start - 1);
2000                if (match < 0) {
2001                    targetText.setIndex(0);
2002                    return DONE;
2003                }
2004                targetText.setIndex(match);
2005                setMatchLength(pattern.length());
2006                return match;
2007            }
2008
2009            @Override
2010            public int getIndex()
2011            {
2012                int result = targetText.getIndex();
2013                if (result < 0 || result >= text.length()) {
2014                    return DONE;
2015                }
2016                return result;
2017            }
2018        }
2019
2020        TestSearch search = new TestSearch(
2021                            new StringCharacterIterator("abc abcd abc"),
2022                            null, "abc");
2023        int expected[] = {0, 4, 9};
2024        for (int i = 0; i < expected.length; i ++) {
2025            if (search.next() != expected[i]) {
2026                errln("Error getting next match");
2027            }
2028            if (search.getMatchLength() != search.pattern.length()) {
2029                errln("Error getting next match length");
2030            }
2031        }
2032        if (search.next() != SearchIterator.DONE) {
2033            errln("Error should have reached the end of the iteration");
2034        }
2035        for (int i = expected.length - 1; i >= 0; i --) {
2036            if (search.previous() != expected[i]) {
2037                errln("Error getting next match");
2038            }
2039            if (search.getMatchLength() != search.pattern.length()) {
2040                errln("Error getting next match length");
2041            }
2042        }
2043        if (search.previous() != SearchIterator.DONE) {
2044            errln("Error should have reached the start of the iteration");
2045        }
2046    }
2047
2048    //Test for ticket 5024
2049    @Test
2050    public void TestDiactricMatch() {
2051        String pattern = "pattern";
2052        String text = "text";
2053        StringSearch strsrch = null;
2054        try {
2055            strsrch = new StringSearch(pattern, text);
2056        } catch (Exception e) {
2057            errln("Error opening string search ");
2058            return;
2059        }
2060
2061        for (int count = 0; count < DIACTRICMATCH.length; count++) {
2062            strsrch.setCollator(getCollator(DIACTRICMATCH[count].collator));
2063            strsrch.getCollator().setStrength(DIACTRICMATCH[count].strength);
2064            strsrch.setBreakIterator(getBreakIterator(DIACTRICMATCH[count].breaker));
2065            strsrch.reset();
2066            text = DIACTRICMATCH[count].text;
2067            pattern = DIACTRICMATCH[count].pattern;
2068            strsrch.setTarget(new StringCharacterIterator(text));
2069            strsrch.setPattern(pattern);
2070            if (!assertEqualWithStringSearch(strsrch, DIACTRICMATCH[count])) {
2071                errln("Error at test number " + count);
2072            }
2073        }
2074    }
2075
2076    @Test
2077    public void TestUsingSearchCollator() {
2078        String scKoText =
2079            " " +
2080    /*01*/  "\uAC00 " +                   // simple LV Hangul
2081    /*03*/  "\uAC01 " +                   // simple LVT Hangul
2082    /*05*/  "\uAC0F " +                   // LVTT, last jamo expands for search
2083    /*07*/  "\uAFFF " +                   // LLVVVTT, every jamo expands for search
2084    /*09*/  "\u1100\u1161\u11A8 " +       // 0xAC01 as conjoining jamo
2085    /*13*/  "\u1100\u1161\u1100 " +       // 0xAC01 as basic conjoining jamo (per search rules)
2086    /*17*/  "\u3131\u314F\u3131 " +       // 0xAC01 as compatibility jamo
2087    /*21*/  "\u1100\u1161\u11B6 " +       // 0xAC0F as conjoining jamo; last expands for search
2088    /*25*/  "\u1100\u1161\u1105\u1112 " + // 0xAC0F as basic conjoining jamo; last expands for search
2089    /*30*/  "\u1101\u1170\u11B6 " +       // 0xAFFF as conjoining jamo; all expand for search
2090    /*34*/  "\u00E6 " +                   // small letter ae, expands
2091    /*36*/  "\u1E4D " +                   // small letter o with tilde and acute, decomposes
2092            "";
2093
2094        String scKoPat0 = "\uAC01";
2095        String scKoPat1 = "\u1100\u1161\u11A8"; // 0xAC01 as conjoining jamo
2096        String scKoPat2 = "\uAC0F";
2097        String scKoPat3 = "\u1100\u1161\u1105\u1112"; // 0xAC0F as basic conjoining jamo
2098        String scKoPat4 = "\uAFFF";
2099        String scKoPat5 = "\u1101\u1170\u11B6"; // 0xAFFF as conjoining jamo
2100
2101        int[] scKoSrchOff01 = { 3,  9, 13 };
2102        int[] scKoSrchOff23 = { 5, 21, 25 };
2103        int[] scKoSrchOff45 = { 7, 30     };
2104
2105        int[] scKoStndOff01 = { 3,  9 };
2106        int[] scKoStndOff2  = { 5, 21 };
2107        int[] scKoStndOff3  = { 25    };
2108        int[] scKoStndOff45 = { 7, 30 };
2109
2110        class PatternAndOffsets {
2111            private String pattern;
2112            private int[] offsets;
2113            PatternAndOffsets(String pat, int[] offs) {
2114                pattern = pat;
2115                offsets = offs;
2116            }
2117            public String getPattern() { return pattern; }
2118            public int[] getOffsets() { return offsets; }
2119        }
2120        final PatternAndOffsets[] scKoSrchPatternsOffsets = {
2121            new PatternAndOffsets( scKoPat0, scKoSrchOff01 ),
2122            new PatternAndOffsets( scKoPat1, scKoSrchOff01 ),
2123            new PatternAndOffsets( scKoPat2, scKoSrchOff23 ),
2124            new PatternAndOffsets( scKoPat3, scKoSrchOff23 ),
2125            new PatternAndOffsets( scKoPat4, scKoSrchOff45 ),
2126            new PatternAndOffsets( scKoPat5, scKoSrchOff45 ),
2127        };
2128        final PatternAndOffsets[] scKoStndPatternsOffsets = {
2129            new PatternAndOffsets( scKoPat0, scKoStndOff01 ),
2130            new PatternAndOffsets( scKoPat1, scKoStndOff01 ),
2131            new PatternAndOffsets( scKoPat2, scKoStndOff2  ),
2132            new PatternAndOffsets( scKoPat3, scKoStndOff3  ),
2133            new PatternAndOffsets( scKoPat4, scKoStndOff45 ),
2134            new PatternAndOffsets( scKoPat5, scKoStndOff45 ),
2135        };
2136
2137        class TUSCItem {
2138            private String localeString;
2139            private String text;
2140            private PatternAndOffsets[] patternsAndOffsets;
2141            TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) {
2142                localeString = locStr;
2143                text = txt;
2144                patternsAndOffsets = patsAndOffs;
2145            }
2146            public String getLocaleString() { return localeString; }
2147            public String getText() { return text; }
2148            public PatternAndOffsets[] getPatternsAndOffsets() { return patternsAndOffsets; }
2149        }
2150        final TUSCItem[] tuscItems = {
2151            new TUSCItem( "root",                  scKoText, scKoStndPatternsOffsets ),
2152            new TUSCItem( "root@collation=search", scKoText, scKoSrchPatternsOffsets ),
2153            new TUSCItem( "ko@collation=search",   scKoText, scKoSrchPatternsOffsets ),
2154        };
2155
2156        String dummyPat = "a";
2157
2158        for (TUSCItem tuscItem: tuscItems) {
2159            String localeString = tuscItem.getLocaleString();
2160            ULocale uloc = new ULocale(localeString);
2161            RuleBasedCollator col = null;
2162            try {
2163                col = (RuleBasedCollator)Collator.getInstance(uloc);
2164            } catch (Exception e) {
2165                errln("Error: in locale " + localeString + ", err in Collator.getInstance");
2166                continue;
2167            }
2168            StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText());
2169            StringSearch srch = new StringSearch(dummyPat, ci, col);
2170            for ( PatternAndOffsets patternAndOffsets: tuscItem.getPatternsAndOffsets() ) {
2171                srch.setPattern(patternAndOffsets.getPattern());
2172                int[] offsets = patternAndOffsets.getOffsets();
2173                int ioff, noff = offsets.length;
2174                int offset;
2175
2176                srch.reset();
2177                ioff = 0;
2178                while (true) {
2179                    offset = srch.next();
2180                    if (offset == SearchIterator.DONE) {
2181                        break;
2182                    }
2183                    if ( ioff < noff ) {
2184                        if ( offset != offsets[ioff] ) {
2185                            errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset);
2186                            //ioff = noff;
2187                            //break;
2188                        }
2189                        ioff++;
2190                    } else {
2191                        errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected");
2192                    }
2193                }
2194                if ( ioff < noff ) {
2195                    errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected");
2196                }
2197
2198                srch.reset();
2199                ioff = noff;
2200                while (true) {
2201                    offset = srch.previous();
2202                    if (offset == SearchIterator.DONE) {
2203                        break;
2204                    }
2205                    if ( ioff > 0 ) {
2206                        ioff--;
2207                        if ( offset != offsets[ioff] ) {
2208                             errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset);
2209                            //ioff = 0;
2210                            // break;
2211                        }
2212                    } else {
2213                        errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected");
2214                    }
2215                }
2216                if ( ioff > 0 ) {
2217                    errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected");
2218                }
2219            }
2220        }
2221    }
2222
2223    @Test
2224    public void TestIndicPrefixMatch() {
2225        for (int count = 0; count < INDICPREFIXMATCH.length; count++) {
2226            if (!assertEqual(INDICPREFIXMATCH[count])) {
2227                errln("Error at test number" + count);
2228            }
2229        }
2230    }
2231
2232
2233    // Test case for ticket#12555
2234    @Test
2235    public void TestLongPattern() {
2236        StringBuilder pattern = new StringBuilder();
2237        for (int i = 0; i < 255; i++) {
2238            pattern.append('a');
2239        }
2240        // appends a character producing multiple ce32 at
2241        // index 256.
2242        pattern.append('á');
2243
2244        CharacterIterator target = new StringCharacterIterator("not important");
2245        try {
2246            StringSearch ss = new StringSearch(pattern.toString(), target, Locale.ENGLISH);
2247            assertNotNull("Non-null StringSearch instance", ss);
2248        } catch (Exception e) {
2249            errln("Error initializing a new StringSearch object");
2250        }
2251    }
2252}
2253