1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/* 4 ******************************************************************************* 5 * Copyright (C) 2000-2015, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10/** 11 * Port From: ICU4C v2.1 : collate/StringSearchTest 12 * Source File: $ICU4CRoot/source/test/intltest/srchtest.cpp 13 **/ 14 15package com.ibm.icu.dev.test.search; 16 17import static com.ibm.icu.text.Collator.IDENTICAL; 18import static com.ibm.icu.text.Collator.PRIMARY; 19import static com.ibm.icu.text.Collator.QUATERNARY; 20import static com.ibm.icu.text.Collator.SECONDARY; 21import static com.ibm.icu.text.Collator.TERTIARY; 22import static com.ibm.icu.text.SearchIterator.ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD; 23import static com.ibm.icu.text.SearchIterator.ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD; 24import static com.ibm.icu.text.SearchIterator.ElementComparisonType.STANDARD_ELEMENT_COMPARISON; 25 26import java.text.CharacterIterator; 27import java.text.StringCharacterIterator; 28import java.util.Locale; 29 30import org.junit.Before; 31import org.junit.Test; 32import org.junit.runner.RunWith; 33import org.junit.runners.JUnit4; 34 35import com.ibm.icu.dev.test.TestFmwk; 36import com.ibm.icu.text.BreakIterator; 37import com.ibm.icu.text.Collator; 38import com.ibm.icu.text.RuleBasedCollator; 39import com.ibm.icu.text.SearchIterator; 40import com.ibm.icu.text.SearchIterator.ElementComparisonType; 41import com.ibm.icu.text.StringSearch; 42import com.ibm.icu.util.ULocale; 43 44@RunWith(JUnit4.class) 45public class SearchTest extends TestFmwk { 46 47 //inner class 48 static class SearchData { 49 SearchData(String text, String pattern, 50 String coll, int strength, ElementComparisonType cmpType, String breaker, 51 int[] offset, int[] size) { 52 this.text = text; 53 this.pattern = pattern; 54 this.collator = coll; 55 this.strength = strength; 56 this.cmpType = cmpType; 57 this.breaker = breaker; 58 this.offset = offset; 59 this.size = size; 60 } 61 String text; 62 String pattern; 63 String collator; 64 int strength; 65 ElementComparisonType cmpType; 66 String breaker; 67 int[] offset; 68 int[] size; 69 } 70 71 RuleBasedCollator m_en_us_; 72 RuleBasedCollator m_fr_fr_; 73 RuleBasedCollator m_de_; 74 RuleBasedCollator m_es_; 75 BreakIterator m_en_wordbreaker_; 76 BreakIterator m_en_characterbreaker_; 77 78 // Just calling SearchData constructor, to make the test data source code 79 // nice and short 80 private static SearchData SD(String text, String pattern, String coll, int strength, 81 ElementComparisonType cmpType, String breaker, int[] offset, int[] size) { 82 return new SearchData(text, pattern, coll, strength, cmpType, breaker, offset, size); 83 } 84 85 // Just returning int[], to make the test data nice and short 86 private static int[] IA(int... elements) { 87 return elements; 88 } 89 90 static SearchData[] BASIC = { 91 SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 92 SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)), 93 SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)), 94 SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)), 95 SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)), 96 SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), 97 SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)), 98 SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 99 SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 100 SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 101 SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 102 SD("\u00c9", "e", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 103 }; 104 105 SearchData BREAKITERATOREXACT[] = { 106 SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)), 107 SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)), 108 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)), 109 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)), 110 SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)), 111 /* jitterbug 1745 */ 112 SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)), 113 SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)), 114 SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)), 115 }; 116 117 SearchData BREAKITERATORCANONICAL[] = { 118 SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)), 119 SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)), 120 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)), 121 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)), 122 SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)), 123 /* jitterbug 1745 */ 124 SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)), 125 SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)), 126 SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)), 127 }; 128 129 SearchData BASICCANONICAL[] = { 130 SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 131 SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)), 132 SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)), 133 SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)), 134 SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)), 135 SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), 136 SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)), 137 138 SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 139 SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 140 SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 141 SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 142 SD("a\u0300\u0325b", "\u0300b", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 143 SD("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 144 SD("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 145 SD("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 146 147 SD("\u00c4\u0323", "A\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), 148 SD("\u0308\u0323", "\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), 149 }; 150 151 SearchData COLLATOR[] = { 152 /* english */ 153 SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 154 /* tailored */ 155 SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)), 156 }; 157 158 String TESTCOLLATORRULE = "& o,O ; p,P"; 159 String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc"; 160 161 SearchData COLLATORCANONICAL[] = { 162 /* english */ 163 SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 164 /* tailored */ 165 SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)), 166 }; 167 168 SearchData COMPOSITEBOUNDARIES[] = { 169 SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 170 SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 171 SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), 172 SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 173 SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 174 SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 175 176 /* first one matches only because it's at the start of the text */ 177 SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 178 179 /* \\u0300 blocked by \\u0300 */ 180 SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 181 182 /* A + 030A + 0301 */ 183 SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 184 SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 185 186 SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 187 SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 188 189 SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 190 191 SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 192 193 /* blocked accent */ 194 SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 195 SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 196 197 SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 198 SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 199 SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 200 201 SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 202 203 SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 204 SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 205 206 SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 207 208 SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 209 SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 210 SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)), 211 }; 212 213 SearchData COMPOSITEBOUNDARIESCANONICAL[] = { 214 SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 215 SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 216 SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), 217 SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 218 SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 219 SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 220 221 /* first one matches only because it's at the start of the text */ 222 SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 223 224 /* \u0300 blocked by \u0300 */ 225 SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 226 227 /* A + 030A + 0301 */ 228 SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 229 SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 230 231 SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 232 SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 233 234 SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 235 236 SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 237 238 /* blocked accent */ 239 SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 240 SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 241 242 SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 243 SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 244 SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 245 246 SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 247 248 SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 249 SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 250 251 SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), 252 253 SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 254 SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 255 256 SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)), 257 }; 258 259 SearchData SUPPLEMENTARY[] = { 260 SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", 261 "\uD800\uDC00", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)), 262 SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)), 263 SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 264 SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 265 SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 266 SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 267 }; 268 269 String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315"; 270 271 SearchData CONTRACTION[] = { 272 /* common discontiguous */ 273 SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 274 275 SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 276 277 /* contraction prefix */ 278 SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 279 280 SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 281 SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 282 283 /* 284 * discontiguous problem here for backwards iteration. accents not found because discontiguous stores all 285 * information 286 */ 287 SD("X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 288 /* ends not with a contraction character */ 289 SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 290 SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 291 SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 292 /* blocked discontiguous */ 293 SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 294 295 /* 296 * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the 297 * match fails because it ends in the middle of an expansion... 298 */ 299 SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 300 }; 301 302 SearchData CONTRACTIONCANONICAL[] = { 303 /* common discontiguous */ 304 SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 305 SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 306 307 /* contraction prefix */ 308 SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 309 310 SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 311 SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 312 313 /* 314 * discontiguous problem here for backwards iteration. forwards gives 0, 4 but backwards give 1, 3 315 */ 316 /* 317 * {"X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, {0, -1), {4}), 318 */ 319 320 /* ends not with a contraction character */ 321 SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 322 SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 323 324 SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 325 326 /* blocked discontiguous */ 327 SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 328 329 /* 330 * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the 331 * match fails because it ends in the middle of an expansion... 332 */ 333 SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(2)), 334 }; 335 336 SearchData MATCH[] = { 337 SD("a busy bee is a very busy beeee", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, -1), IA(3, 3)), 338 /* 012345678901234567890123456789012345678901234567890 */ 339 SD("a busy bee is a very busy beeee with no bee life", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, 40, -1), IA(3, 3, 3)), 340 }; 341 342 String IGNORABLERULE = "&a = \u0300"; 343 344 SearchData IGNORABLE[] = { 345 /* 346 * This isn't much of a test when matches have to be on grapheme boundiaries. The match at 0 only works because it's 347 * at the start of the text. 348 */ 349 SD("\u0300\u0315 \u0300\u0315 ", "\u0300", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), 350 }; 351 352 SearchData DIACTRICMATCH[] = { 353 SD("\u0061\u0061\u00E1", "\u0061\u00E1", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(2)), 354 SD("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020", "\u00C2\u0303", 355 null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 4, 5, 6, 7, 10, 12, 13, 16, -1), IA(2, 1, 1, 1, 3, 2, 1, 3, 2)), 356 SD("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 5, -1), IA(4, 3)), 357 }; 358 359 SearchData NORMCANONICAL[] = { 360 SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 361 SD("\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 362 SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 363 SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 364 SD("a\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 365 SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 366 }; 367 368 SearchData NORMEXACT[] = { 369 SD("a\u0300\u0325", "a\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), 370 }; 371 372 SearchData NONNORMEXACT[] = { 373 SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 374 }; 375 376 SearchData OVERLAP[] = { 377 SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)), 378 }; 379 380 SearchData NONOVERLAP[] = { 381 SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)), 382 }; 383 384 SearchData OVERLAPCANONICAL[] = { 385 SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)), 386 }; 387 388 SearchData NONOVERLAPCANONICAL[] = { 389 SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)), 390 }; 391 392 SearchData PATTERNCANONICAL[] = { 393 SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)), 394 SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), 395 }; 396 397 SearchData PATTERN[] = { 398 SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)), 399 SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), 400 }; 401 402 String PECHE_WITH_ACCENTS = "un p\u00E9ch\u00E9, " 403 + "\u00E7a p\u00E8che par, " 404 + "p\u00E9cher, " 405 + "une p\u00EAche, " 406 + "un p\u00EAcher, " 407 + "j\u2019ai p\u00EAch\u00E9, " 408 + "un p\u00E9cheur, " 409 + "\u201Cp\u00E9che\u201D, " 410 + "decomp peche\u0301, " 411 + "base peche"; 412 // in the above, the interesting words and their offsets are: 413 // 3 pe<301>che<301> 414 // 13 pe<300>che 415 // 24 pe<301>cher 416 // 36 pe<302>che 417 // 46 pe<302>cher 418 // 59 pe<302>che<301> 419 // 69 pe<301>cheur 420 // 79 pe<301>che 421 // 94 peche<+301> 422 // 107 peche 423 424 SearchData STRENGTH[] = { 425 /* 012345678901234567890123456789012345678901234567890123456789 */ 426 SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), 427 SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)), 428 SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", 429 "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)), 430 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)), 431 SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)), 432 SD("\u00c0 should match but not A", "A\u0300", "en", IDENTICAL, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1, 0)), 433 434 /* some tests for modified element comparison, ticket #7093 */ 435 SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 436 SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 437 SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)), 438 SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 439 SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 440 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)), 441 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)), 442 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)), 443 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)), 444 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)), 445 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)), 446 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), 447 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), 448 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 449 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 450 SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), 451 SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), 452 SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 453 SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 454 455 /* more tests for modified element comparison (with fr), ticket #7093 */ 456 SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 457 SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 458 SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)), 459 SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 460 SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 461 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)), 462 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)), 463 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)), 464 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)), 465 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)), 466 SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)), 467 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), 468 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), 469 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 470 SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 471 SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), 472 SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), 473 SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), 474 SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), 475 476 }; 477 478 SearchData STRENGTHCANONICAL[] = { 479 /* 012345678901234567890123456789012345678901234567890123456789 */ 480 SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), 481 SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)), 482 SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", 483 "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)), 484 SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)), 485 SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)), 486 }; 487 488 SearchData SUPPLEMENTARYCANONICAL[] = { 489 /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ 490 SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", "\uD800\uDC00", 491 null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)), 492 SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)), 493 SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 494 SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 495 SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 496 SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), 497 }; 498 499 static SearchData VARIABLE[] = { 500 /* 012345678901234567890123456789012345678901234567890123456789 */ 501 SD("blackbirds black blackbirds blackbird black-bird", "blackbird", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 17, 28, 38, -1), IA(9, 9, 9, 10)), 502 503 /* 504 * to see that it doesn't go into an infinite loop if the start of text is a ignorable character 505 */ 506 SD(" on", "go", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 507 SD("abcdefghijklmnopqrstuvwxyz", " ", 508 null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, 509 IA(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1), 510 IA(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), 511 512 /* testing tightest match */ 513 SD(" abc a bc ab c a bc ab c", "abc", null, QUATERNARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(3)), 514 /* 012345678901234567890123456789012345678901234567890123456789 */ 515 SD(" abc a bc ab c a bc ab c", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 6, 13, 21, 31, -1), IA(3, 4, 4, 5, 5)), 516 517 /* totally ignorable text */ 518 SD(" ---------------", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), 519 }; 520 521 static SearchData TEXTCANONICAL[] = { 522 SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)), 523 SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)), 524 }; 525 526 static SearchData INDICPREFIXMATCH[] = { 527 SD("\u0915\u0020\u0915\u0901\u0020\u0915\u0902\u0020\u0915\u0903\u0020\u0915\u0940\u0020\u0915\u093F\u0020\u0915\u0943\u0020\u0915\u093C\u0020\u0958", 528 "\u0915", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 5, 8, 11, 14, 17, 20, 23,-1), IA(1, 2, 2, 2, 1, 1, 1, 2, 1)), 529 SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947", 530 "\u0915\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 3, 7, 11, -1), IA(2, 2, 2, 2)), 531 SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947", 532 "\u0915\u0943\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 19, -1), IA(3, 3)), 533 }; 534 535 /** 536 * Constructor 537 */ 538 public SearchTest() 539 { 540 541 } 542 543 @Before 544 public void init() throws Exception { 545 m_en_us_ = (RuleBasedCollator)Collator.getInstance(Locale.US); 546 m_fr_fr_ = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE); 547 m_de_ = (RuleBasedCollator)Collator.getInstance(new Locale("de", "DE")); 548 m_es_ = (RuleBasedCollator)Collator.getInstance(new Locale("es", "ES")); 549 m_en_wordbreaker_ = BreakIterator.getWordInstance(); 550 m_en_characterbreaker_ = BreakIterator.getCharacterInstance(); 551 String rules = m_de_.getRules() + EXTRACOLLATIONRULE; 552 m_de_ = new RuleBasedCollator(rules); 553 rules = m_es_.getRules() + EXTRACOLLATIONRULE; 554 m_es_ = new RuleBasedCollator(rules); 555 556 } 557 558 RuleBasedCollator getCollator(String collator) { 559 if (collator == null) { 560 return m_en_us_; 561 } if (collator.equals("fr")) { 562 return m_fr_fr_; 563 } else if (collator.equals("de")) { 564 return m_de_; 565 } else if (collator.equals("es")) { 566 return m_es_; 567 } else { 568 return m_en_us_; 569 } 570 } 571 572 BreakIterator getBreakIterator(String breaker) { 573 if (breaker == null) { 574 return null; 575 } if (breaker.equals("wordbreaker")) { 576 return m_en_wordbreaker_; 577 } else { 578 return m_en_characterbreaker_; 579 } 580 } 581 582 boolean assertCanonicalEqual(SearchData search) { 583 Collator collator = getCollator(search.collator); 584 BreakIterator breaker = getBreakIterator(search.breaker); 585 StringSearch strsrch; 586 587 String text = search.text; 588 String pattern = search.pattern; 589 590 if (breaker != null) { 591 breaker.setText(text); 592 } 593 collator.setStrength(search.strength); 594 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 595 try { 596 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); 597 strsrch.setElementComparisonType(search.cmpType); 598 strsrch.setCanonical(true); 599 } catch (Exception e) { 600 errln("Error opening string search" + e.getMessage()); 601 return false; 602 } 603 604 if (!assertEqualWithStringSearch(strsrch, search)) { 605 collator.setStrength(TERTIARY); 606 collator.setDecomposition(Collator.NO_DECOMPOSITION); 607 return false; 608 } 609 collator.setStrength(TERTIARY); 610 collator.setDecomposition(Collator.NO_DECOMPOSITION); 611 return true; 612 } 613 614 boolean assertEqual(SearchData search) { 615 Collator collator = getCollator(search.collator); 616 BreakIterator breaker = getBreakIterator(search.breaker); 617 StringSearch strsrch; 618 619 String text = search.text; 620 String pattern = search.pattern; 621 622 if (breaker != null) { 623 breaker.setText(text); 624 } 625 collator.setStrength(search.strength); 626 try { 627 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); 628 strsrch.setElementComparisonType(search.cmpType); 629 } catch (Exception e) { 630 errln("Error opening string search " + e.getMessage()); 631 return false; 632 } 633 634 if (!assertEqualWithStringSearch(strsrch, search)) { 635 collator.setStrength(TERTIARY); 636 return false; 637 } 638 collator.setStrength(TERTIARY); 639 return true; 640 } 641 642 boolean assertEqualWithAttribute(SearchData search, boolean canonical, boolean overlap) { 643 Collator collator = getCollator(search.collator); 644 BreakIterator breaker = getBreakIterator(search.breaker); 645 StringSearch strsrch; 646 647 String text = search.text; 648 String pattern = search.pattern; 649 650 if (breaker != null) { 651 breaker.setText(text); 652 } 653 collator.setStrength(search.strength); 654 try { 655 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); 656 strsrch.setCanonical(canonical); 657 strsrch.setOverlapping(overlap); 658 strsrch.setElementComparisonType(search.cmpType); 659 } catch (Exception e) { 660 errln("Error opening string search " + e.getMessage()); 661 return false; 662 } 663 664 if (!assertEqualWithStringSearch(strsrch, search)) { 665 collator.setStrength(TERTIARY); 666 return false; 667 } 668 collator.setStrength(TERTIARY); 669 return true; 670 } 671 672 boolean assertEqualWithStringSearch(StringSearch strsrch, SearchData search) { 673 int count = 0; 674 int matchindex = search.offset[count]; 675 String matchtext; 676 677 if (strsrch.getMatchStart() != SearchIterator.DONE || 678 strsrch.getMatchLength() != 0) { 679 errln("Error with the initialization of match start and length"); 680 } 681 // start of following matches 682 while (matchindex >= 0) { 683 int matchlength = search.size[count]; 684 strsrch.next(); 685 //int x = strsrch.getMatchStart(); 686 if (matchindex != strsrch.getMatchStart() || 687 matchlength != strsrch.getMatchLength()) { 688 errln("Text: " + search.text); 689 errln("Searching forward for pattern: " + strsrch.getPattern()); 690 errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 691 return false; 692 } 693 count ++; 694 695 matchtext = strsrch.getMatchedText(); 696 String targetText = search.text; 697 if (matchlength > 0 && 698 targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) { 699 errln("Error getting following matched text"); 700 } 701 702 matchindex = search.offset[count]; 703 } 704 strsrch.next(); 705 if (strsrch.getMatchStart() != SearchIterator.DONE || 706 strsrch.getMatchLength() != 0) { 707 errln("Text: " + search.text); 708 errln("Searching forward for pattern: " + strsrch.getPattern()); 709 errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 710 return false; 711 } 712 // start of preceding matches 713 count = count == 0 ? 0 : count - 1; 714 matchindex = search.offset[count]; 715 while (matchindex >= 0) { 716 int matchlength = search.size[count]; 717 strsrch.previous(); 718 if (matchindex != strsrch.getMatchStart() || 719 matchlength != strsrch.getMatchLength()) { 720 errln("Text: " + search.text); 721 errln("Searching backward for pattern: " + strsrch.getPattern()); 722 errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 723 return false; 724 } 725 726 matchtext = strsrch.getMatchedText(); 727 String targetText = search.text; 728 if (matchlength > 0 && 729 targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) { 730 errln("Error getting following matched text"); 731 } 732 733 matchindex = count > 0 ? search.offset[count - 1] : -1; 734 count --; 735 } 736 strsrch.previous(); 737 if (strsrch.getMatchStart() != SearchIterator.DONE || 738 strsrch.getMatchLength() != 0) { 739 errln("Text: " + search.text); 740 errln("Searching backward for pattern: " + strsrch.getPattern()); 741 errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 742 return false; 743 } 744 return true; 745 } 746 747 @Test 748 public void TestConstructor() 749 { 750 String pattern = "pattern"; 751 String text = "text"; 752 StringCharacterIterator textiter = new StringCharacterIterator(text); 753 Collator defaultcollator = Collator.getInstance(); 754 BreakIterator breaker = BreakIterator.getCharacterInstance(); 755 breaker.setText(text); 756 StringSearch search = new StringSearch(pattern, text); 757 if (!search.getPattern().equals(pattern) 758 || !search.getTarget().equals(textiter) 759 || !search.getCollator().equals(defaultcollator) 760 /*|| !search.getBreakIterator().equals(breaker)*/) { 761 errln("StringSearch(String, String) error"); 762 } 763 search = new StringSearch(pattern, textiter, m_fr_fr_); 764 if (!search.getPattern().equals(pattern) 765 || !search.getTarget().equals(textiter) 766 || !search.getCollator().equals(m_fr_fr_) 767 /*|| !search.getBreakIterator().equals(breaker)*/) { 768 errln("StringSearch(String, StringCharacterIterator, " 769 + "RuleBasedCollator) error"); 770 } 771 Locale de = new Locale("de", "DE"); 772 breaker = BreakIterator.getCharacterInstance(de); 773 breaker.setText(text); 774 search = new StringSearch(pattern, textiter, de); 775 if (!search.getPattern().equals(pattern) 776 || !search.getTarget().equals(textiter) 777 || !search.getCollator().equals(Collator.getInstance(de)) 778 /*|| !search.getBreakIterator().equals(breaker)*/) { 779 errln("StringSearch(String, StringCharacterIterator, Locale) " 780 + "error"); 781 } 782 783 search = new StringSearch(pattern, textiter, m_fr_fr_, 784 m_en_wordbreaker_); 785 if (!search.getPattern().equals(pattern) 786 || !search.getTarget().equals(textiter) 787 || !search.getCollator().equals(m_fr_fr_) 788 || !search.getBreakIterator().equals(m_en_wordbreaker_)) { 789 errln("StringSearch(String, StringCharacterIterator, Locale) " 790 + "error"); 791 } 792 } 793 794 @Test 795 public void TestBasic() { 796 for (int count = 0; count < BASIC.length; count++) { 797 if (!assertEqual(BASIC[count])) { 798 errln("Error at test number " + count); 799 } 800 } 801 } 802 803 @Test 804 public void TestBreakIterator() { 805 806 String text = BREAKITERATOREXACT[0].text; 807 String pattern = BREAKITERATOREXACT[0].pattern; 808 StringSearch strsrch = null; 809 try { 810 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 811 } catch (Exception e) { 812 errln("Error opening string search"); 813 return; 814 } 815 816 strsrch.setBreakIterator(null); 817 if (strsrch.getBreakIterator() != null) { 818 errln("Error usearch_getBreakIterator returned wrong object"); 819 } 820 821 strsrch.setBreakIterator(m_en_characterbreaker_); 822 if (!strsrch.getBreakIterator().equals(m_en_characterbreaker_)) { 823 errln("Error usearch_getBreakIterator returned wrong object"); 824 } 825 826 strsrch.setBreakIterator(m_en_wordbreaker_); 827 if (!strsrch.getBreakIterator().equals(m_en_wordbreaker_)) { 828 errln("Error usearch_getBreakIterator returned wrong object"); 829 } 830 831 int count = 0; 832 while (count < 4) { 833 // special purposes for tests numbers 0-3 834 SearchData search = BREAKITERATOREXACT[count]; 835 RuleBasedCollator collator = getCollator(search.collator); 836 BreakIterator breaker = getBreakIterator(search.breaker); 837 //StringSearch strsrch; 838 839 text = search.text; 840 pattern = search.pattern; 841 if (breaker != null) { 842 breaker.setText(text); 843 } 844 collator.setStrength(search.strength); 845 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker); 846 if (strsrch.getBreakIterator() != breaker) { 847 errln("Error setting break iterator"); 848 } 849 if (!assertEqualWithStringSearch(strsrch, search)) { 850 collator.setStrength(TERTIARY); 851 } 852 search = BREAKITERATOREXACT[count + 1]; 853 breaker = getBreakIterator(search.breaker); 854 if (breaker != null) { 855 breaker.setText(text); 856 } 857 strsrch.setBreakIterator(breaker); 858 if (strsrch.getBreakIterator() != breaker) { 859 errln("Error setting break iterator"); 860 } 861 strsrch.reset(); 862 if (!assertEqualWithStringSearch(strsrch, search)) { 863 errln("Error at test number " + count); 864 } 865 count += 2; 866 } 867 for (count = 0; count < BREAKITERATOREXACT.length; count++) { 868 if (!assertEqual(BREAKITERATOREXACT[count])) { 869 errln("Error at test number " + count); 870 } 871 } 872 } 873 874 @Test 875 public void TestBreakIteratorCanonical() { 876 int count = 0; 877 while (count < 4) { 878 // special purposes for tests numbers 0-3 879 SearchData search = BREAKITERATORCANONICAL[count]; 880 881 String text = search.text; 882 String pattern = search.pattern; 883 RuleBasedCollator collator = getCollator(search.collator); 884 collator.setStrength(search.strength); 885 886 BreakIterator breaker = getBreakIterator(search.breaker); 887 StringSearch strsrch = null; 888 try { 889 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker); 890 } catch (Exception e) { 891 errln("Error creating string search data"); 892 return; 893 } 894 strsrch.setCanonical(true); 895 if (!strsrch.getBreakIterator().equals(breaker)) { 896 errln("Error setting break iterator"); 897 return; 898 } 899 if (!assertEqualWithStringSearch(strsrch, search)) { 900 collator.setStrength(TERTIARY); 901 return; 902 } 903 search = BREAKITERATOREXACT[count + 1]; 904 breaker = getBreakIterator(search.breaker); 905 breaker.setText(strsrch.getTarget()); 906 strsrch.setBreakIterator(breaker); 907 if (!strsrch.getBreakIterator().equals(breaker)) { 908 errln("Error setting break iterator"); 909 return; 910 } 911 strsrch.reset(); 912 strsrch.setCanonical(true); 913 if (!assertEqualWithStringSearch(strsrch, search)) { 914 errln("Error at test number " + count); 915 return; 916 } 917 count += 2; 918 } 919 920 for (count = 0; count < BREAKITERATORCANONICAL.length; count++) { 921 if (!assertEqual(BREAKITERATORCANONICAL[count])) { 922 errln("Error at test number " + count); 923 return; 924 } 925 } 926 } 927 928 @Test 929 public void TestCanonical() { 930 for (int count = 0; count < BASICCANONICAL.length; count++) { 931 if (!assertCanonicalEqual(BASICCANONICAL[count])) { 932 errln("Error at test number " + count); 933 } 934 } 935 } 936 937 @Test 938 public void TestCollator() { 939 // test collator that thinks "o" and "p" are the same thing 940 String text = COLLATOR[0].text; 941 String pattern = COLLATOR[0].pattern; 942 StringSearch strsrch = null; 943 try { 944 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 945 } catch (Exception e) { 946 errln("Error opening string search "); 947 return; 948 } 949 if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) { 950 return; 951 } 952 String rules = TESTCOLLATORRULE; 953 RuleBasedCollator tailored = null; 954 try { 955 tailored = new RuleBasedCollator(rules); 956 tailored.setStrength(COLLATOR[1].strength); 957 } catch (Exception e) { 958 errln("Error opening rule based collator "); 959 return; 960 } 961 962 strsrch.setCollator(tailored); 963 if (!strsrch.getCollator().equals(tailored)) { 964 errln("Error setting rule based collator"); 965 } 966 strsrch.reset(); 967 if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) { 968 return; 969 } 970 strsrch.setCollator(m_en_us_); 971 strsrch.reset(); 972 if (!strsrch.getCollator().equals(m_en_us_)) { 973 errln("Error setting rule based collator"); 974 } 975 if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) { 976 errln("Error searching collator test"); 977 } 978 } 979 980 @Test 981 public void TestCollatorCanonical() { 982 /* test collator that thinks "o" and "p" are the same thing */ 983 String text = COLLATORCANONICAL[0].text; 984 String pattern = COLLATORCANONICAL[0].pattern; 985 986 StringSearch strsrch = null; 987 try { 988 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 989 strsrch.setCanonical(true); 990 } catch (Exception e) { 991 errln("Error opening string search "); 992 } 993 994 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) { 995 return; 996 } 997 998 String rules = TESTCOLLATORRULE; 999 RuleBasedCollator tailored = null; 1000 try { 1001 tailored = new RuleBasedCollator(rules); 1002 tailored.setStrength(COLLATORCANONICAL[1].strength); 1003 tailored.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1004 } catch (Exception e) { 1005 errln("Error opening rule based collator "); 1006 } 1007 1008 strsrch.setCollator(tailored); 1009 if (!strsrch.getCollator().equals(tailored)) { 1010 errln("Error setting rule based collator"); 1011 } 1012 strsrch.reset(); 1013 strsrch.setCanonical(true); 1014 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[1])) { 1015 logln("COLLATORCANONICAL[1] failed"); // Error should already be reported. 1016 } 1017 strsrch.setCollator(m_en_us_); 1018 strsrch.reset(); 1019 if (!strsrch.getCollator().equals(m_en_us_)) { 1020 errln("Error setting rule based collator"); 1021 } 1022 if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) { 1023 logln("COLLATORCANONICAL[0] failed"); // Error should already be reported. 1024 } 1025 } 1026 1027 @Test 1028 public void TestCompositeBoundaries() { 1029 for (int count = 0; count < COMPOSITEBOUNDARIES.length; count++) { 1030 // logln("composite " + count); 1031 if (!assertEqual(COMPOSITEBOUNDARIES[count])) { 1032 errln("Error at test number " + count); 1033 } 1034 } 1035 } 1036 1037 @Test 1038 public void TestCompositeBoundariesCanonical() { 1039 for (int count = 0; count < COMPOSITEBOUNDARIESCANONICAL.length; count++) { 1040 // logln("composite " + count); 1041 if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) { 1042 errln("Error at test number " + count); 1043 } 1044 } 1045 } 1046 1047 @Test 1048 public void TestContraction() { 1049 String rules = CONTRACTIONRULE; 1050 RuleBasedCollator collator = null; 1051 try { 1052 collator = new RuleBasedCollator(rules); 1053 collator.setStrength(TERTIARY); 1054 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1055 } catch (Exception e) { 1056 errln("Error opening collator "); 1057 } 1058 String text = "text"; 1059 String pattern = "pattern"; 1060 StringSearch strsrch = null; 1061 try { 1062 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); 1063 } catch (Exception e) { 1064 errln("Error opening string search "); 1065 } 1066 1067 for (int count = 0; count< CONTRACTION.length; count++) { 1068 text = CONTRACTION[count].text; 1069 pattern = CONTRACTION[count].pattern; 1070 strsrch.setTarget(new StringCharacterIterator(text)); 1071 strsrch.setPattern(pattern); 1072 if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) { 1073 errln("Error at test number " + count); 1074 } 1075 } 1076 } 1077 1078 @Test 1079 public void TestContractionCanonical() { 1080 String rules = CONTRACTIONRULE; 1081 RuleBasedCollator collator = null; 1082 try { 1083 collator = new RuleBasedCollator(rules); 1084 collator.setStrength(TERTIARY); 1085 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1086 } catch (Exception e) { 1087 errln("Error opening collator "); 1088 } 1089 String text = "text"; 1090 String pattern = "pattern"; 1091 StringSearch strsrch = null; 1092 try { 1093 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); 1094 strsrch.setCanonical(true); 1095 } catch (Exception e) { 1096 errln("Error opening string search"); 1097 } 1098 1099 for (int count = 0; count < CONTRACTIONCANONICAL.length; count++) { 1100 text = CONTRACTIONCANONICAL[count].text; 1101 pattern = CONTRACTIONCANONICAL[count].pattern; 1102 strsrch.setTarget(new StringCharacterIterator(text)); 1103 strsrch.setPattern(pattern); 1104 if (!assertEqualWithStringSearch(strsrch, CONTRACTIONCANONICAL[count])) { 1105 errln("Error at test number " + count); 1106 } 1107 } 1108 } 1109 1110 @Test 1111 public void TestGetMatch() { 1112 SearchData search = MATCH[0]; 1113 String text = search.text; 1114 String pattern = search.pattern; 1115 1116 StringSearch strsrch = null; 1117 try { 1118 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1119 } catch (Exception e) { 1120 errln("Error opening string search "); 1121 return; 1122 } 1123 1124 int count = 0; 1125 int matchindex = search.offset[count]; 1126 String matchtext; 1127 while (matchindex >= 0) { 1128 int matchlength = search.size[count]; 1129 strsrch.next(); 1130 if (matchindex != strsrch.getMatchStart() || 1131 matchlength != strsrch.getMatchLength()) { 1132 errln("Text: " + search.text); 1133 errln("Pattern: " + strsrch.getPattern()); 1134 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1135 return; 1136 } 1137 count++; 1138 1139 matchtext = strsrch.getMatchedText(); 1140 if (matchtext.length() != matchlength){ 1141 errln("Error getting match text"); 1142 } 1143 matchindex = search.offset[count]; 1144 } 1145 strsrch.next(); 1146 if (strsrch.getMatchStart() != StringSearch.DONE || 1147 strsrch.getMatchLength() != 0) { 1148 errln("Error end of match not found"); 1149 } 1150 matchtext = strsrch.getMatchedText(); 1151 if (matchtext != null) { 1152 errln("Error getting null matches"); 1153 } 1154 } 1155 1156 @Test 1157 public void TestGetSetAttribute() { 1158 String pattern = "pattern"; 1159 String text = "text"; 1160 StringSearch strsrch = null; 1161 try { 1162 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1163 } catch (Exception e) { 1164 errln("Error opening search"); 1165 return; 1166 } 1167 1168 if (strsrch.isOverlapping()) { 1169 errln("Error default overlaping should be false"); 1170 } 1171 strsrch.setOverlapping(true); 1172 if (!strsrch.isOverlapping()) { 1173 errln("Error setting overlap true"); 1174 } 1175 strsrch.setOverlapping(false); 1176 if (strsrch.isOverlapping()) { 1177 errln("Error setting overlap false"); 1178 } 1179 1180 strsrch.setCanonical(true); 1181 if (!strsrch.isCanonical()) { 1182 errln("Error setting canonical match true"); 1183 } 1184 strsrch.setCanonical(false); 1185 if (strsrch.isCanonical()) { 1186 errln("Error setting canonical match false"); 1187 } 1188 1189 if (strsrch.getElementComparisonType() != STANDARD_ELEMENT_COMPARISON) { 1190 errln("Error default element comparison type should be STANDARD_ELEMENT_COMPARISON"); 1191 } 1192 strsrch.setElementComparisonType(ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD); 1193 if (strsrch.getElementComparisonType() != ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD) { 1194 errln("Error setting element comparison type PATTERN_BASE_WEIGHT_IS_WILDCARD"); 1195 } 1196 } 1197 1198 @Test 1199 public void TestGetSetOffset() { 1200 String pattern = "1234567890123456"; 1201 String text = "12345678901234567890123456789012"; 1202 StringSearch strsrch = null; 1203 try { 1204 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1205 } catch (Exception e) { 1206 errln("Error opening search"); 1207 1208 return; 1209 } 1210 1211 /* testing out of bounds error */ 1212 try { 1213 strsrch.setIndex(-1); 1214 errln("Error expecting set offset error"); 1215 } catch (IndexOutOfBoundsException e) { 1216 logln("PASS: strsrch.setIndex(-1) failed as expected"); 1217 } 1218 1219 try { 1220 strsrch.setIndex(128); 1221 errln("Error expecting set offset error"); 1222 } catch (IndexOutOfBoundsException e) { 1223 logln("PASS: strsrch.setIndex(128) failed as expected"); 1224 } 1225 1226 for (int index = 0; index < BASIC.length; index++) { 1227 SearchData search = BASIC[index]; 1228 1229 text =search.text; 1230 pattern = search.pattern; 1231 strsrch.setTarget(new StringCharacterIterator(text)); 1232 strsrch.setPattern(pattern); 1233 strsrch.getCollator().setStrength(search.strength); 1234 strsrch.reset(); 1235 1236 int count = 0; 1237 int matchindex = search.offset[count]; 1238 1239 while (matchindex >= 0) { 1240 int matchlength = search.size[count]; 1241 strsrch.next(); 1242 if (matchindex != strsrch.getMatchStart() || 1243 matchlength != strsrch.getMatchLength()) { 1244 errln("Text: " + text); 1245 errln("Pattern: " + strsrch.getPattern()); 1246 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1247 return; 1248 } 1249 matchindex = search.offset[count + 1] == -1 ? -1 : 1250 search.offset[count + 2]; 1251 if (search.offset[count + 1] != -1) { 1252 strsrch.setIndex(search.offset[count + 1] + 1); 1253 if (strsrch.getIndex() != search.offset[count + 1] + 1) { 1254 errln("Error setting offset\n"); 1255 return; 1256 } 1257 } 1258 1259 count += 2; 1260 } 1261 strsrch.next(); 1262 if (strsrch.getMatchStart() != StringSearch.DONE) { 1263 errln("Text: " + text); 1264 errln("Pattern: " + strsrch.getPattern()); 1265 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1266 return; 1267 } 1268 } 1269 strsrch.getCollator().setStrength(TERTIARY); 1270 } 1271 1272 @Test 1273 public void TestGetSetOffsetCanonical() { 1274 1275 String text = "text"; 1276 String pattern = "pattern"; 1277 StringSearch strsrch = null; 1278 try { 1279 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1280 } catch (Exception e) { 1281 errln("Fail to open StringSearch!"); 1282 return; 1283 } 1284 strsrch.setCanonical(true); 1285 //TODO: setCanonical is not sufficient for canonical match. See #10725 1286 strsrch.getCollator().setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1287 /* testing out of bounds error */ 1288 try { 1289 strsrch.setIndex(-1); 1290 errln("Error expecting set offset error"); 1291 } catch (IndexOutOfBoundsException e) { 1292 logln("PASS: strsrch.setIndex(-1) failed as expected"); 1293 } 1294 try { 1295 strsrch.setIndex(128); 1296 errln("Error expecting set offset error"); 1297 } catch (IndexOutOfBoundsException e) { 1298 logln("PASS: strsrch.setIndex(128) failed as expected"); 1299 } 1300 1301 for (int index = 0; index < BASICCANONICAL.length; index++) { 1302 SearchData search = BASICCANONICAL[index]; 1303 text = search.text; 1304 pattern = search.pattern; 1305 strsrch.setTarget(new StringCharacterIterator(text)); 1306 strsrch.setPattern(pattern); 1307 int count = 0; 1308 int matchindex = search.offset[count]; 1309 while (matchindex >= 0) { 1310 int matchlength = search.size[count]; 1311 strsrch.next(); 1312 if (matchindex != strsrch.getMatchStart() || 1313 matchlength != strsrch.getMatchLength()) { 1314 errln("Text: " + text); 1315 errln("Pattern: " + strsrch.getPattern()); 1316 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1317 return; 1318 } 1319 matchindex = search.offset[count + 1] == -1 ? -1 : 1320 search.offset[count + 2]; 1321 if (search.offset[count + 1] != -1) { 1322 strsrch.setIndex(search.offset[count + 1] + 1); 1323 if (strsrch.getIndex() != search.offset[count + 1] + 1) { 1324 errln("Error setting offset"); 1325 return; 1326 } 1327 } 1328 1329 count += 2; 1330 } 1331 strsrch.next(); 1332 if (strsrch.getMatchStart() != StringSearch.DONE) { 1333 errln("Text: " + text); 1334 errln("Pattern: %s" + strsrch.getPattern()); 1335 errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); 1336 return; 1337 } 1338 } 1339 strsrch.getCollator().setStrength(TERTIARY); 1340 strsrch.getCollator().setDecomposition(Collator.NO_DECOMPOSITION); 1341 } 1342 1343 @Test 1344 public void TestIgnorable() { 1345 String rules = IGNORABLERULE; 1346 int count = 0; 1347 RuleBasedCollator collator = null; 1348 try { 1349 collator = new RuleBasedCollator(rules); 1350 collator.setStrength(IGNORABLE[count].strength); 1351 collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1352 } catch (Exception e) { 1353 errln("Error opening collator "); 1354 return; 1355 } 1356 String pattern = "pattern"; 1357 String text = "text"; 1358 StringSearch strsrch = null; 1359 try { 1360 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); 1361 } catch (Exception e) { 1362 errln("Error opening string search "); 1363 return; 1364 } 1365 1366 for (; count < IGNORABLE.length; count++) { 1367 text = IGNORABLE[count].text; 1368 pattern = IGNORABLE[count].pattern; 1369 strsrch.setTarget(new StringCharacterIterator(text)); 1370 strsrch.setPattern(pattern); 1371 if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) { 1372 errln("Error at test number " + count); 1373 } 1374 } 1375 } 1376 1377 @Test 1378 public void TestInitialization() { 1379 String pattern; 1380 String text; 1381 String temp = "a"; 1382 StringSearch result; 1383 1384 /* simple test on the pattern ce construction */ 1385 pattern = temp + temp; 1386 text = temp + temp + temp; 1387 try { 1388 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1389 } catch (Exception e) { 1390 errln("Error opening search "); 1391 return; 1392 } 1393 1394 /* testing if an extremely large pattern will fail the initialization */ 1395 pattern = ""; 1396 for (int count = 0; count < 512; count ++) { 1397 pattern += temp; 1398 } 1399 try { 1400 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1401 logln("pattern:" + result.getPattern()); 1402 } catch (Exception e) { 1403 errln("Fail: an extremely large pattern will fail the initialization"); 1404 return; 1405 } 1406 } 1407 1408 @Test 1409 public void TestNormCanonical() { 1410 m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1411 for (int count = 0; count < NORMCANONICAL.length; count++) { 1412 if (!assertCanonicalEqual(NORMCANONICAL[count])) { 1413 errln("Error at test number " + count); 1414 } 1415 } 1416 m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION); 1417 } 1418 1419 @Test 1420 public void TestNormExact() { 1421 int count; 1422 1423 m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 1424 for (count = 0; count < BASIC.length; count++) { 1425 if (!assertEqual(BASIC[count])) { 1426 errln("Error at test number " + count); 1427 } 1428 } 1429 for (count = 0; count < NORMEXACT.length; count++) { 1430 if (!assertEqual(NORMEXACT[count])) { 1431 errln("Error at test number " + count); 1432 } 1433 } 1434 m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION); 1435 for (count = 0; count < NONNORMEXACT.length; count++) { 1436 if (!assertEqual(NONNORMEXACT[count])) { 1437 errln("Error at test number " + count); 1438 } 1439 } 1440 } 1441 1442 @Test 1443 public void TestOpenClose() { 1444 StringSearch result; 1445 BreakIterator breakiter = m_en_wordbreaker_; 1446 String pattern = ""; 1447 String text = ""; 1448 String temp = "a"; 1449 StringCharacterIterator chariter= new StringCharacterIterator(text); 1450 1451 /* testing null arguments */ 1452 try { 1453 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); 1454 errln("Error: null arguments should produce an error"); 1455 } catch (Exception e) { 1456 logln("PASS: null arguments failed as expected"); 1457 } 1458 1459 chariter.setText(text); 1460 try { 1461 result = new StringSearch(pattern, chariter, null, null); 1462 errln("Error: null arguments should produce an error"); 1463 } catch (Exception e) { 1464 logln("PASS: null arguments failed as expected"); 1465 } 1466 1467 text = String.valueOf(0x1); 1468 try { 1469 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); 1470 errln("Error: Empty pattern should produce an error"); 1471 } catch (Exception e) { 1472 logln("PASS: Empty pattern failed as expected"); 1473 } 1474 1475 chariter.setText(text); 1476 try { 1477 result = new StringSearch(pattern, chariter, null, null); 1478 errln("Error: Empty pattern should produce an error"); 1479 } catch (Exception e) { 1480 logln("PASS: Empty pattern failed as expected"); 1481 } 1482 1483 text = ""; 1484 pattern =temp; 1485 try { 1486 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); 1487 errln("Error: Empty text should produce an error"); 1488 } catch (Exception e) { 1489 logln("PASS: Empty text failed as expected"); 1490 } 1491 1492 chariter.setText(text); 1493 try { 1494 result = new StringSearch(pattern, chariter, null, null); 1495 errln("Error: Empty text should produce an error"); 1496 } catch (Exception e) { 1497 logln("PASS: Empty text failed as expected"); 1498 } 1499 1500 text += temp; 1501 try { 1502 result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); 1503 errln("Error: null arguments should produce an error"); 1504 } catch (Exception e) { 1505 logln("PASS: null arguments failed as expected"); 1506 } 1507 1508 chariter.setText(text); 1509 try { 1510 result = new StringSearch(pattern, chariter, null, null); 1511 errln("Error: null arguments should produce an error"); 1512 } catch (Exception e) { 1513 logln("PASS: null arguments failed as expected"); 1514 } 1515 1516 try { 1517 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); 1518 } catch (Exception e) { 1519 errln("Error: null break iterator is valid for opening search"); 1520 } 1521 1522 try { 1523 result = new StringSearch(pattern, chariter, m_en_us_, null); 1524 } catch (Exception e) { 1525 errln("Error: null break iterator is valid for opening search"); 1526 } 1527 1528 try { 1529 result = new StringSearch(pattern, new StringCharacterIterator(text), Locale.ENGLISH); 1530 } catch (Exception e) { 1531 errln("Error: null break iterator is valid for opening search"); 1532 } 1533 1534 try { 1535 result = new StringSearch(pattern, chariter, Locale.ENGLISH); 1536 } catch (Exception e) { 1537 errln("Error: null break iterator is valid for opening search"); 1538 } 1539 1540 try { 1541 result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, breakiter); 1542 } catch (Exception e) { 1543 errln("Error: Break iterator is valid for opening search"); 1544 } 1545 1546 try { 1547 result = new StringSearch(pattern, chariter, m_en_us_, null); 1548 logln("pattern:" + result.getPattern()); 1549 } catch (Exception e) { 1550 errln("Error: Break iterator is valid for opening search"); 1551 } 1552 } 1553 1554 @Test 1555 public void TestOverlap() { 1556 int count; 1557 1558 for (count = 0; count < OVERLAP.length; count++) { 1559 if (!assertEqualWithAttribute(OVERLAP[count], false, true)) { 1560 errln("Error at overlap test number " + count); 1561 } 1562 } 1563 1564 for (count = 0; count < NONOVERLAP.length; count++) { 1565 if (!assertEqual(NONOVERLAP[count])) { 1566 errln("Error at non overlap test number " + count); 1567 } 1568 } 1569 1570 for (count = 0; count < OVERLAP.length && count < NONOVERLAP.length; count++) { 1571 SearchData search = (OVERLAP[count]); 1572 String text = search.text; 1573 String pattern = search.pattern; 1574 1575 RuleBasedCollator collator = getCollator(search.collator); 1576 StringSearch strsrch = null; 1577 try { 1578 strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); 1579 } catch (Exception e) { 1580 errln("error open StringSearch"); 1581 return; 1582 } 1583 1584 strsrch.setOverlapping(true); 1585 if (!strsrch.isOverlapping()) { 1586 errln("Error setting overlap option"); 1587 } 1588 if (!assertEqualWithStringSearch(strsrch, search)) { 1589 return; 1590 } 1591 1592 search = NONOVERLAP[count]; 1593 strsrch.setOverlapping(false); 1594 if (strsrch.isOverlapping()) { 1595 errln("Error setting overlap option"); 1596 } 1597 strsrch.reset(); 1598 if (!assertEqualWithStringSearch(strsrch, search)) { 1599 errln("Error at test number " + count); 1600 } 1601 } 1602 } 1603 1604 @Test 1605 public void TestOverlapCanonical() { 1606 int count; 1607 1608 for (count = 0; count < OVERLAPCANONICAL.length; count++) { 1609 if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true, true)) { 1610 errln("Error at overlap test number %d" + count); 1611 } 1612 } 1613 1614 for (count = 0; count < NONOVERLAP.length; count++) { 1615 if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) { 1616 errln("Error at non overlap test number %d" + count); 1617 } 1618 } 1619 1620 for (count = 0; count < OVERLAPCANONICAL.length && count < NONOVERLAPCANONICAL.length; count++) { 1621 SearchData search = OVERLAPCANONICAL[count]; 1622 RuleBasedCollator collator = getCollator(search.collator); 1623 StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), collator, null); 1624 strsrch.setCanonical(true); 1625 strsrch.setOverlapping(true); 1626 if (strsrch.isOverlapping() != true) { 1627 errln("Error setting overlap option"); 1628 } 1629 if (!assertEqualWithStringSearch(strsrch, search)) { 1630 strsrch = null; 1631 return; 1632 } 1633 search = NONOVERLAPCANONICAL[count]; 1634 strsrch.setOverlapping(false); 1635 if (strsrch.isOverlapping() != false) { 1636 errln("Error setting overlap option"); 1637 } 1638 strsrch.reset(); 1639 if (!assertEqualWithStringSearch(strsrch, search)) { 1640 strsrch = null; 1641 errln("Error at test number %d" + count); 1642 } 1643 } 1644 } 1645 1646 @Test 1647 public void TestPattern() { 1648 m_en_us_.setStrength(PATTERN[0].strength); 1649 StringSearch strsrch = new StringSearch(PATTERN[0].pattern, new StringCharacterIterator(PATTERN[0].text), m_en_us_, null); 1650 1651 if (strsrch.getPattern() != PATTERN[0].pattern) { 1652 errln("Error setting pattern"); 1653 } 1654 if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) { 1655 m_en_us_.setStrength(TERTIARY); 1656 if (strsrch != null) { 1657 strsrch = null; 1658 } 1659 return; 1660 } 1661 1662 strsrch.setPattern(PATTERN[1].pattern); 1663 if (PATTERN[1].pattern != strsrch.getPattern()) { 1664 errln("Error setting pattern"); 1665 m_en_us_.setStrength(TERTIARY); 1666 if (strsrch != null) { 1667 strsrch = null; 1668 } 1669 return; 1670 } 1671 strsrch.reset(); 1672 1673 if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) { 1674 m_en_us_.setStrength(TERTIARY); 1675 if (strsrch != null) { 1676 strsrch = null; 1677 } 1678 return; 1679 } 1680 1681 strsrch.setPattern(PATTERN[0].pattern); 1682 if (PATTERN[0].pattern != strsrch.getPattern()) { 1683 errln("Error setting pattern"); 1684 m_en_us_.setStrength(TERTIARY); 1685 if (strsrch != null) { 1686 strsrch = null; 1687 } 1688 return; 1689 } 1690 strsrch.reset(); 1691 1692 if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) { 1693 m_en_us_.setStrength(TERTIARY); 1694 if (strsrch != null) { 1695 strsrch = null; 1696 } 1697 return; 1698 } 1699 /* enormous pattern size to see if this crashes */ 1700 String pattern = ""; 1701 for (int templength = 0; templength != 512; templength ++) { 1702 pattern += 0x61; 1703 } 1704 try{ 1705 strsrch.setPattern(pattern); 1706 }catch(Exception e) { 1707 errln("Error setting pattern with size 512"); 1708 } 1709 1710 m_en_us_.setStrength(TERTIARY); 1711 if (strsrch != null) { 1712 strsrch = null; 1713 } 1714 } 1715 1716 @Test 1717 public void TestPatternCanonical() { 1718 //StringCharacterIterator text = new StringCharacterIterator(PATTERNCANONICAL[0].text); 1719 m_en_us_.setStrength(PATTERNCANONICAL[0].strength); 1720 StringSearch strsrch = new StringSearch(PATTERNCANONICAL[0].pattern, new StringCharacterIterator(PATTERNCANONICAL[0].text), 1721 m_en_us_, null); 1722 strsrch.setCanonical(true); 1723 1724 if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) { 1725 errln("Error setting pattern"); 1726 } 1727 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) { 1728 m_en_us_.setStrength(TERTIARY); 1729 strsrch = null; 1730 return; 1731 } 1732 1733 strsrch.setPattern(PATTERNCANONICAL[1].pattern); 1734 if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) { 1735 errln("Error setting pattern"); 1736 m_en_us_.setStrength(TERTIARY); 1737 strsrch = null; 1738 return; 1739 } 1740 strsrch.reset(); 1741 strsrch.setCanonical(true); 1742 1743 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) { 1744 m_en_us_.setStrength(TERTIARY); 1745 strsrch = null; 1746 return; 1747 } 1748 1749 strsrch.setPattern(PATTERNCANONICAL[0].pattern); 1750 if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) { 1751 errln("Error setting pattern"); 1752 m_en_us_.setStrength(TERTIARY); 1753 strsrch = null; 1754 return; 1755 } 1756 1757 strsrch.reset(); 1758 strsrch.setCanonical(true); 1759 if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) { 1760 m_en_us_.setStrength(TERTIARY); 1761 strsrch = null; 1762 return; 1763 } 1764 } 1765 1766 @Test 1767 public void TestReset() { 1768 StringCharacterIterator text = new StringCharacterIterator("fish fish"); 1769 String pattern = "s"; 1770 1771 StringSearch strsrch = new StringSearch(pattern, text, m_en_us_, null); 1772 strsrch.setOverlapping(true); 1773 strsrch.setCanonical(true); 1774 strsrch.setIndex(9); 1775 strsrch.reset(); 1776 if (strsrch.isCanonical() || strsrch.isOverlapping() || 1777 strsrch.getIndex() != 0 || strsrch.getMatchLength() != 0 || 1778 strsrch.getMatchStart() != SearchIterator.DONE) { 1779 errln("Error resetting string search"); 1780 } 1781 1782 strsrch.previous(); 1783 if (strsrch.getMatchStart() != 7 || strsrch.getMatchLength() != 1) { 1784 errln("Error resetting string search\n"); 1785 } 1786 } 1787 1788 @Test 1789 public void TestSetMatch() { 1790 for (int count = 0; count < MATCH.length; count++) { 1791 SearchData search = MATCH[count]; 1792 StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), 1793 m_en_us_, null); 1794 1795 int size = 0; 1796 while (search.offset[size] != -1) { 1797 size ++; 1798 } 1799 1800 if (strsrch.first() != search.offset[0]) { 1801 errln("Error getting first match"); 1802 } 1803 if (strsrch.last() != search.offset[size -1]) { 1804 errln("Error getting last match"); 1805 } 1806 1807 int index = 0; 1808 while (index < size) { 1809 if (index + 2 < size) { 1810 if (strsrch.following(search.offset[index + 2] - 1) != search.offset[index + 2]) { 1811 errln("Error getting following match at index " + (search.offset[index + 2]-1)); 1812 } 1813 } 1814 if (index + 1 < size) { 1815 if (strsrch.preceding(search.offset[index + 1] + search.size[index + 1] + 1) != search.offset[index + 1]) { 1816 errln("Error getting preceeding match at index " + (search.offset[index + 1] + 1)); 1817 } 1818 } 1819 index += 2; 1820 } 1821 1822 if (strsrch.following(search.text.length()) != SearchIterator.DONE) { 1823 errln("Error expecting out of bounds match"); 1824 } 1825 if (strsrch.preceding(0) != SearchIterator.DONE) { 1826 errln("Error expecting out of bounds match"); 1827 } 1828 } 1829 } 1830 1831 @Test 1832 public void TestStrength() { 1833 for (int count = 0; count < STRENGTH.length; count++) { 1834 if (!assertEqual(STRENGTH[count])) { 1835 errln("Error at test number " + count); 1836 } 1837 } 1838 } 1839 1840 @Test 1841 public void TestStrengthCanonical() { 1842 for (int count = 0; count < STRENGTHCANONICAL.length; count++) { 1843 if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) { 1844 errln("Error at test number" + count); 1845 } 1846 } 1847 } 1848 1849 @Test 1850 public void TestSupplementary() { 1851 for (int count = 0; count < SUPPLEMENTARY.length; count++) { 1852 if (!assertEqual(SUPPLEMENTARY[count])) { 1853 errln("Error at test number " + count); 1854 } 1855 } 1856 } 1857 1858 @Test 1859 public void TestSupplementaryCanonical() { 1860 for (int count = 0; count < SUPPLEMENTARYCANONICAL.length; count++) { 1861 if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) { 1862 errln("Error at test number" + count); 1863 } 1864 } 1865 } 1866 1867 @Test 1868 public void TestText() { 1869 SearchData TEXT[] = { 1870 SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)), 1871 SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)) 1872 }; 1873 StringCharacterIterator t = new StringCharacterIterator(TEXT[0].text); 1874 StringSearch strsrch = new StringSearch(TEXT[0].pattern, t, m_en_us_, null); 1875 1876 if (!t.equals(strsrch.getTarget())) { 1877 errln("Error setting text"); 1878 } 1879 if (!assertEqualWithStringSearch(strsrch, TEXT[0])) { 1880 errln("Error at assertEqualWithStringSearch"); 1881 return; 1882 } 1883 1884 t = new StringCharacterIterator(TEXT[1].text); 1885 strsrch.setTarget(t); 1886 if (!t.equals(strsrch.getTarget())) { 1887 errln("Error setting text"); 1888 return; 1889 } 1890 1891 if (!assertEqualWithStringSearch(strsrch, TEXT[1])) { 1892 errln("Error at assertEqualWithStringSearch"); 1893 return; 1894 } 1895 } 1896 1897 @Test 1898 public void TestTextCanonical() { 1899 StringCharacterIterator t = new StringCharacterIterator(TEXTCANONICAL[0].text); 1900 StringSearch strsrch = new StringSearch(TEXTCANONICAL[0].pattern, t, m_en_us_, null); 1901 strsrch.setCanonical(true); 1902 1903 if (!t.equals(strsrch.getTarget())) { 1904 errln("Error setting text"); 1905 } 1906 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) { 1907 strsrch = null; 1908 return; 1909 } 1910 1911 t = new StringCharacterIterator(TEXTCANONICAL[1].text); 1912 strsrch.setTarget(t); 1913 if (!t.equals(strsrch.getTarget())) { 1914 errln("Error setting text"); 1915 strsrch = null; 1916 return; 1917 } 1918 1919 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[1])) { 1920 strsrch = null; 1921 return; 1922 } 1923 1924 t = new StringCharacterIterator(TEXTCANONICAL[0].text); 1925 strsrch.setTarget(t); 1926 if (!t.equals(strsrch.getTarget())) { 1927 errln("Error setting text"); 1928 strsrch = null; 1929 return; 1930 } 1931 1932 if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) { 1933 errln("Error at assertEqualWithStringSearch"); 1934 strsrch = null; 1935 return; 1936 } 1937 } 1938 1939 @Test 1940 public void TestVariable() { 1941 m_en_us_.setAlternateHandlingShifted(true); 1942 for (int count = 0; count < VARIABLE.length; count++) { 1943 // logln("variable" + count); 1944 if (!assertEqual(VARIABLE[count])) { 1945 errln("Error at test number " + count); 1946 } 1947 } 1948 m_en_us_.setAlternateHandlingShifted(false); 1949 } 1950 1951 @Test 1952 public void TestVariableCanonical() { 1953 m_en_us_.setAlternateHandlingShifted(true); 1954 for (int count = 0; count < VARIABLE.length; count++) { 1955 // logln("variable " + count); 1956 if (!assertCanonicalEqual(VARIABLE[count])) { 1957 errln("Error at test number " + count); 1958 } 1959 } 1960 m_en_us_.setAlternateHandlingShifted(false); 1961 } 1962 1963 @Test 1964 public void TestSubClass() 1965 { 1966 class TestSearch extends SearchIterator 1967 { 1968 String pattern; 1969 String text; 1970 1971 TestSearch(StringCharacterIterator target, BreakIterator breaker, 1972 String pattern) 1973 { 1974 super(target, breaker); 1975 this.pattern = pattern; 1976 StringBuffer buffer = new StringBuffer(); 1977 while (targetText.getIndex() != targetText.getEndIndex()) { 1978 buffer.append(targetText.current()); 1979 targetText.next(); 1980 } 1981 text = buffer.toString(); 1982 targetText.setIndex(targetText.getBeginIndex()); 1983 } 1984 @Override 1985 protected int handleNext(int start) 1986 { 1987 int match = text.indexOf(pattern, start); 1988 if (match < 0) { 1989 targetText.last(); 1990 return DONE; 1991 } 1992 targetText.setIndex(match); 1993 setMatchLength(pattern.length()); 1994 return match; 1995 } 1996 @Override 1997 protected int handlePrevious(int start) 1998 { 1999 int match = text.lastIndexOf(pattern, start - 1); 2000 if (match < 0) { 2001 targetText.setIndex(0); 2002 return DONE; 2003 } 2004 targetText.setIndex(match); 2005 setMatchLength(pattern.length()); 2006 return match; 2007 } 2008 2009 @Override 2010 public int getIndex() 2011 { 2012 int result = targetText.getIndex(); 2013 if (result < 0 || result >= text.length()) { 2014 return DONE; 2015 } 2016 return result; 2017 } 2018 } 2019 2020 TestSearch search = new TestSearch( 2021 new StringCharacterIterator("abc abcd abc"), 2022 null, "abc"); 2023 int expected[] = {0, 4, 9}; 2024 for (int i = 0; i < expected.length; i ++) { 2025 if (search.next() != expected[i]) { 2026 errln("Error getting next match"); 2027 } 2028 if (search.getMatchLength() != search.pattern.length()) { 2029 errln("Error getting next match length"); 2030 } 2031 } 2032 if (search.next() != SearchIterator.DONE) { 2033 errln("Error should have reached the end of the iteration"); 2034 } 2035 for (int i = expected.length - 1; i >= 0; i --) { 2036 if (search.previous() != expected[i]) { 2037 errln("Error getting next match"); 2038 } 2039 if (search.getMatchLength() != search.pattern.length()) { 2040 errln("Error getting next match length"); 2041 } 2042 } 2043 if (search.previous() != SearchIterator.DONE) { 2044 errln("Error should have reached the start of the iteration"); 2045 } 2046 } 2047 2048 //Test for ticket 5024 2049 @Test 2050 public void TestDiactricMatch() { 2051 String pattern = "pattern"; 2052 String text = "text"; 2053 StringSearch strsrch = null; 2054 try { 2055 strsrch = new StringSearch(pattern, text); 2056 } catch (Exception e) { 2057 errln("Error opening string search "); 2058 return; 2059 } 2060 2061 for (int count = 0; count < DIACTRICMATCH.length; count++) { 2062 strsrch.setCollator(getCollator(DIACTRICMATCH[count].collator)); 2063 strsrch.getCollator().setStrength(DIACTRICMATCH[count].strength); 2064 strsrch.setBreakIterator(getBreakIterator(DIACTRICMATCH[count].breaker)); 2065 strsrch.reset(); 2066 text = DIACTRICMATCH[count].text; 2067 pattern = DIACTRICMATCH[count].pattern; 2068 strsrch.setTarget(new StringCharacterIterator(text)); 2069 strsrch.setPattern(pattern); 2070 if (!assertEqualWithStringSearch(strsrch, DIACTRICMATCH[count])) { 2071 errln("Error at test number " + count); 2072 } 2073 } 2074 } 2075 2076 @Test 2077 public void TestUsingSearchCollator() { 2078 String scKoText = 2079 " " + 2080 /*01*/ "\uAC00 " + // simple LV Hangul 2081 /*03*/ "\uAC01 " + // simple LVT Hangul 2082 /*05*/ "\uAC0F " + // LVTT, last jamo expands for search 2083 /*07*/ "\uAFFF " + // LLVVVTT, every jamo expands for search 2084 /*09*/ "\u1100\u1161\u11A8 " + // 0xAC01 as conjoining jamo 2085 /*13*/ "\u1100\u1161\u1100 " + // 0xAC01 as basic conjoining jamo (per search rules) 2086 /*17*/ "\u3131\u314F\u3131 " + // 0xAC01 as compatibility jamo 2087 /*21*/ "\u1100\u1161\u11B6 " + // 0xAC0F as conjoining jamo; last expands for search 2088 /*25*/ "\u1100\u1161\u1105\u1112 " + // 0xAC0F as basic conjoining jamo; last expands for search 2089 /*30*/ "\u1101\u1170\u11B6 " + // 0xAFFF as conjoining jamo; all expand for search 2090 /*34*/ "\u00E6 " + // small letter ae, expands 2091 /*36*/ "\u1E4D " + // small letter o with tilde and acute, decomposes 2092 ""; 2093 2094 String scKoPat0 = "\uAC01"; 2095 String scKoPat1 = "\u1100\u1161\u11A8"; // 0xAC01 as conjoining jamo 2096 String scKoPat2 = "\uAC0F"; 2097 String scKoPat3 = "\u1100\u1161\u1105\u1112"; // 0xAC0F as basic conjoining jamo 2098 String scKoPat4 = "\uAFFF"; 2099 String scKoPat5 = "\u1101\u1170\u11B6"; // 0xAFFF as conjoining jamo 2100 2101 int[] scKoSrchOff01 = { 3, 9, 13 }; 2102 int[] scKoSrchOff23 = { 5, 21, 25 }; 2103 int[] scKoSrchOff45 = { 7, 30 }; 2104 2105 int[] scKoStndOff01 = { 3, 9 }; 2106 int[] scKoStndOff2 = { 5, 21 }; 2107 int[] scKoStndOff3 = { 25 }; 2108 int[] scKoStndOff45 = { 7, 30 }; 2109 2110 class PatternAndOffsets { 2111 private String pattern; 2112 private int[] offsets; 2113 PatternAndOffsets(String pat, int[] offs) { 2114 pattern = pat; 2115 offsets = offs; 2116 } 2117 public String getPattern() { return pattern; } 2118 public int[] getOffsets() { return offsets; } 2119 } 2120 final PatternAndOffsets[] scKoSrchPatternsOffsets = { 2121 new PatternAndOffsets( scKoPat0, scKoSrchOff01 ), 2122 new PatternAndOffsets( scKoPat1, scKoSrchOff01 ), 2123 new PatternAndOffsets( scKoPat2, scKoSrchOff23 ), 2124 new PatternAndOffsets( scKoPat3, scKoSrchOff23 ), 2125 new PatternAndOffsets( scKoPat4, scKoSrchOff45 ), 2126 new PatternAndOffsets( scKoPat5, scKoSrchOff45 ), 2127 }; 2128 final PatternAndOffsets[] scKoStndPatternsOffsets = { 2129 new PatternAndOffsets( scKoPat0, scKoStndOff01 ), 2130 new PatternAndOffsets( scKoPat1, scKoStndOff01 ), 2131 new PatternAndOffsets( scKoPat2, scKoStndOff2 ), 2132 new PatternAndOffsets( scKoPat3, scKoStndOff3 ), 2133 new PatternAndOffsets( scKoPat4, scKoStndOff45 ), 2134 new PatternAndOffsets( scKoPat5, scKoStndOff45 ), 2135 }; 2136 2137 class TUSCItem { 2138 private String localeString; 2139 private String text; 2140 private PatternAndOffsets[] patternsAndOffsets; 2141 TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) { 2142 localeString = locStr; 2143 text = txt; 2144 patternsAndOffsets = patsAndOffs; 2145 } 2146 public String getLocaleString() { return localeString; } 2147 public String getText() { return text; } 2148 public PatternAndOffsets[] getPatternsAndOffsets() { return patternsAndOffsets; } 2149 } 2150 final TUSCItem[] tuscItems = { 2151 new TUSCItem( "root", scKoText, scKoStndPatternsOffsets ), 2152 new TUSCItem( "root@collation=search", scKoText, scKoSrchPatternsOffsets ), 2153 new TUSCItem( "ko@collation=search", scKoText, scKoSrchPatternsOffsets ), 2154 }; 2155 2156 String dummyPat = "a"; 2157 2158 for (TUSCItem tuscItem: tuscItems) { 2159 String localeString = tuscItem.getLocaleString(); 2160 ULocale uloc = new ULocale(localeString); 2161 RuleBasedCollator col = null; 2162 try { 2163 col = (RuleBasedCollator)Collator.getInstance(uloc); 2164 } catch (Exception e) { 2165 errln("Error: in locale " + localeString + ", err in Collator.getInstance"); 2166 continue; 2167 } 2168 StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText()); 2169 StringSearch srch = new StringSearch(dummyPat, ci, col); 2170 for ( PatternAndOffsets patternAndOffsets: tuscItem.getPatternsAndOffsets() ) { 2171 srch.setPattern(patternAndOffsets.getPattern()); 2172 int[] offsets = patternAndOffsets.getOffsets(); 2173 int ioff, noff = offsets.length; 2174 int offset; 2175 2176 srch.reset(); 2177 ioff = 0; 2178 while (true) { 2179 offset = srch.next(); 2180 if (offset == SearchIterator.DONE) { 2181 break; 2182 } 2183 if ( ioff < noff ) { 2184 if ( offset != offsets[ioff] ) { 2185 errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset); 2186 //ioff = noff; 2187 //break; 2188 } 2189 ioff++; 2190 } else { 2191 errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected"); 2192 } 2193 } 2194 if ( ioff < noff ) { 2195 errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected"); 2196 } 2197 2198 srch.reset(); 2199 ioff = noff; 2200 while (true) { 2201 offset = srch.previous(); 2202 if (offset == SearchIterator.DONE) { 2203 break; 2204 } 2205 if ( ioff > 0 ) { 2206 ioff--; 2207 if ( offset != offsets[ioff] ) { 2208 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset); 2209 //ioff = 0; 2210 // break; 2211 } 2212 } else { 2213 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected"); 2214 } 2215 } 2216 if ( ioff > 0 ) { 2217 errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected"); 2218 } 2219 } 2220 } 2221 } 2222 2223 @Test 2224 public void TestIndicPrefixMatch() { 2225 for (int count = 0; count < INDICPREFIXMATCH.length; count++) { 2226 if (!assertEqual(INDICPREFIXMATCH[count])) { 2227 errln("Error at test number" + count); 2228 } 2229 } 2230 } 2231 2232 2233 // Test case for ticket#12555 2234 @Test 2235 public void TestLongPattern() { 2236 StringBuilder pattern = new StringBuilder(); 2237 for (int i = 0; i < 255; i++) { 2238 pattern.append('a'); 2239 } 2240 // appends a character producing multiple ce32 at 2241 // index 256. 2242 pattern.append('á'); 2243 2244 CharacterIterator target = new StringCharacterIterator("not important"); 2245 try { 2246 StringSearch ss = new StringSearch(pattern.toString(), target, Locale.ENGLISH); 2247 assertNotNull("Non-null StringSearch instance", ss); 2248 } catch (Exception e) { 2249 errln("Error initializing a new StringSearch object"); 2250 } 2251 } 2252} 2253