// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html#License /* ******************************************************************************* * Copyright (C) 2000-2015, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ /** * Port From: ICU4C v2.1 : collate/StringSearchTest * Source File: $ICU4CRoot/source/test/intltest/srchtest.cpp **/ package com.ibm.icu.dev.test.search; import static com.ibm.icu.text.Collator.IDENTICAL; import static com.ibm.icu.text.Collator.PRIMARY; import static com.ibm.icu.text.Collator.QUATERNARY; import static com.ibm.icu.text.Collator.SECONDARY; import static com.ibm.icu.text.Collator.TERTIARY; import static com.ibm.icu.text.SearchIterator.ElementComparisonType.ANY_BASE_WEIGHT_IS_WILDCARD; import static com.ibm.icu.text.SearchIterator.ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD; import static com.ibm.icu.text.SearchIterator.ElementComparisonType.STANDARD_ELEMENT_COMPARISON; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.Locale; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.text.BreakIterator; import com.ibm.icu.text.Collator; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.SearchIterator; import com.ibm.icu.text.SearchIterator.ElementComparisonType; import com.ibm.icu.text.StringSearch; import com.ibm.icu.util.ULocale; @RunWith(JUnit4.class) public class SearchTest extends TestFmwk { //inner class static class SearchData { SearchData(String text, String pattern, String coll, int strength, ElementComparisonType cmpType, String breaker, int[] offset, int[] size) { this.text = text; this.pattern = pattern; this.collator = coll; this.strength = strength; this.cmpType = cmpType; this.breaker = breaker; this.offset = offset; this.size = size; } String text; String pattern; String collator; int strength; ElementComparisonType cmpType; String breaker; int[] offset; int[] size; } RuleBasedCollator m_en_us_; RuleBasedCollator m_fr_fr_; RuleBasedCollator m_de_; RuleBasedCollator m_es_; BreakIterator m_en_wordbreaker_; BreakIterator m_en_characterbreaker_; // Just calling SearchData constructor, to make the test data source code // nice and short private static SearchData SD(String text, String pattern, String coll, int strength, ElementComparisonType cmpType, String breaker, int[] offset, int[] size) { return new SearchData(text, pattern, coll, strength, cmpType, breaker, offset, size); } // Just returning int[], to make the test data nice and short private static int[] IA(int... elements) { return elements; } static SearchData[] BASIC = { SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)), SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)), SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)), SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)), SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)), SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u00c9", "e", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), }; SearchData BREAKITERATOREXACT[] = { SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)), SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)), SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)), SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)), SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)), /* jitterbug 1745 */ SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)), SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)), SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)), }; SearchData BREAKITERATORCANONICAL[] = { SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, 5, -1), IA(3, 3)), SD("foxy fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(5, -1), IA(3)), SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(10, 14, -1), IA(3, 2)), SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(10, -1), IA(3)), SD("Channel, another channel, more channels, and one last Channel", "Channel", "es", TERTIARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(0, 54, -1), IA(7, 7)), /* jitterbug 1745 */ SD("testing that \u00e9 does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 17, 30, -1), IA(1, 1, 1)), SD("testing that string ab\u00e9cd does not match e", "e", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(1, 28, 41, -1), IA(1, 1, 1)), SD("\u00c9", "e", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "characterbreaker", IA(0, -1), IA(1)), }; SearchData BASICCANONICAL[] = { SD("xxxxxxxxxxxxxxxxxxxx", "fisher", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("silly spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, -1), IA(6)), SD("silly spring string string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(13, 20, -1), IA(6, 6)), SD("silly string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(6, 20, -1), IA(6, 6)), SD("string spring string", "string", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 14, -1), IA(6, 6)), SD("Scott Ganyo", "c", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), SD("Scott Ganyo", " ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(5, -1), IA(1)), SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300b", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325b", "\u0300b", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0325\u0300A\u0325\u0300", "\u0300A\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0325\u0300A\u0325\u0300", "\u0325A\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325b\u0300\u0325c \u0325b\u0300 \u0300b\u0325", "\u0300b\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u00c4\u0323", "A\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), SD("\u0308\u0323", "\u0323\u0308", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), }; SearchData COLLATOR[] = { /* english */ SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), /* tailored */ SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)), }; String TESTCOLLATORRULE = "& o,O ; p,P"; String EXTRACOLLATIONRULE = " & ae ; \u00e4 & AE ; \u00c4 & oe ; \u00f6 & OE ; \u00d6 & ue ; \u00fc & UE ; \u00dc"; SearchData COLLATORCANONICAL[] = { /* english */ SD("fox fpx", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), /* tailored */ SD("fox fpx", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(3, 3)), }; SearchData COMPOSITEBOUNDARIES[] = { SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* first one matches only because it's at the start of the text */ SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), /* \\u0300 blocked by \\u0300 */ SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* A + 030A + 0301 */ SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* blocked accent */ SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)), }; SearchData COMPOSITEBOUNDARIESCANONICAL[] = { SD("\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("A\u00C0C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("\u00C0A", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(1)), SD("B\u00C0", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u00C0B", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* first one matches only because it's at the start of the text */ SD("\u0300\u00C0", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), /* \u0300 blocked by \u0300 */ SD("\u00C0\u0300", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* A + 030A + 0301 */ SD("\u01FA", "\u01FA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("\u01FA", "A\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("\u01FA", "\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "\u030AA", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* blocked accent */ SD("\u01FA", "A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA", "\u030A\u0301", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("A\u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FAA", "\u0301A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0F73", "\u0F73", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("\u0F73", "\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0F73", "\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0F73", "\u0F71\u0F72", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1)), SD("A\u0F73", "A\u0F71", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0F73A", "\u0F72A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u01FA A\u0301\u030A A\u030A\u0301 A\u030A \u01FA", "A\u030A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, -1), IA(2)), }; SearchData SUPPLEMENTARY[] = { SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", "\uD800\uDC00", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)), SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)), SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), }; String CONTRACTIONRULE = "&z = ab/c < AB < X\u0300 < ABC < X\u0300\u0315"; SearchData CONTRACTION[] = { /* common discontiguous */ SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* contraction prefix */ SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* * discontiguous problem here for backwards iteration. accents not found because discontiguous stores all * information */ SD("X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* ends not with a contraction character */ SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* blocked discontiguous */ SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the * match fails because it ends in the middle of an expansion... */ SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), }; SearchData CONTRACTIONCANONICAL[] = { /* common discontiguous */ SD("A\u0300\u0315", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("A\u0300\u0315", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* contraction prefix */ SD("AB\u0315C", "A", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("AB\u0315C", "AB", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("AB\u0315C", "\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* * discontiguous problem here for backwards iteration. forwards gives 0, 4 but backwards give 1, 3 */ /* * {"X\u0300\u0319\u0315", "\u0319", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, {0, -1), {4}), */ /* ends not with a contraction character */ SD("X\u0315\u0300D", "\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("X\u0315\u0300D", "X\u0300\u0315", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), SD("X\u0300\u031A\u0315D", "X\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* blocked discontiguous */ SD("X\u0300\u031A\u0315D", "\u031A\u0315D", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), /* * "ab" generates a contraction that's an expansion. The "z" matches the first CE of the expansion but the * match fails because it ends in the middle of an expansion... */ SD("ab", "z", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(2)), }; SearchData MATCH[] = { SD("a busy bee is a very busy beeee", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, -1), IA(3, 3)), /* 012345678901234567890123456789012345678901234567890 */ SD("a busy bee is a very busy beeee with no bee life", "bee", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(7, 26, 40, -1), IA(3, 3, 3)), }; String IGNORABLERULE = "&a = \u0300"; SearchData IGNORABLE[] = { /* * This isn't much of a test when matches have to be on grapheme boundiaries. The match at 0 only works because it's * at the start of the text. */ SD("\u0300\u0315 \u0300\u0315 ", "\u0300", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(2)), }; SearchData DIACTRICMATCH[] = { SD("\u0061\u0061\u00E1", "\u0061\u00E1", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(2)), SD("\u0020\u00C2\u0303\u0020\u0041\u0061\u1EAA\u0041\u0302\u0303\u00C2\u0303\u1EAB\u0061\u0302\u0303\u00E2\u0303\uD806\uDC01\u0300\u0020", "\u00C2\u0303", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 4, 5, 6, 7, 10, 12, 13, 16, -1), IA(2, 1, 1, 1, 3, 2, 1, 3, 2)), SD("\u03BA\u03B1\u03B9\u0300\u0020\u03BA\u03B1\u1F76", "\u03BA\u03B1\u03B9", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 5, -1), IA(4, 3)), }; SearchData NORMCANONICAL[] = { SD("\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325", "\u0300\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325", "\u0325", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("a\u0300\u0325", "\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), }; SearchData NORMEXACT[] = { SD("a\u0300\u0325", "a\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(3)), }; SearchData NONNORMEXACT[] = { SD("a\u0300\u0325", "\u0325\u0300", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), }; SearchData OVERLAP[] = { SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)), }; SearchData NONOVERLAP[] = { SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)), }; SearchData OVERLAPCANONICAL[] = { SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 4, -1), IA(4, 4, 4)), }; SearchData NONOVERLAPCANONICAL[] = { SD("abababab", "abab", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 4, -1), IA(4, 4)), }; SearchData PATTERNCANONICAL[] = { SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)), SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), }; SearchData PATTERN[] = { SD("The quick brown fox jumps over the lazy foxes", "the", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 31, -1), IA(3, 3)), SD("The quick brown fox jumps over the lazy foxes", "fox", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), }; String PECHE_WITH_ACCENTS = "un p\u00E9ch\u00E9, " + "\u00E7a p\u00E8che par, " + "p\u00E9cher, " + "une p\u00EAche, " + "un p\u00EAcher, " + "j\u2019ai p\u00EAch\u00E9, " + "un p\u00E9cheur, " + "\u201Cp\u00E9che\u201D, " + "decomp peche\u0301, " + "base peche"; // in the above, the interesting words and their offsets are: // 3 pe<301>che<301> // 13 pe<300>che // 24 pe<301>cher // 36 pe<302>che // 46 pe<302>cher // 59 pe<302>che<301> // 69 pe<301>cheur // 79 pe<301>che // 94 peche<+301> // 107 peche SearchData STRENGTH[] = { /* 012345678901234567890123456789012345678901234567890123456789 */ SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)), SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)), SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)), SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)), SD("\u00c0 should match but not A", "A\u0300", "en", IDENTICAL, STANDARD_ELEMENT_COMPARISON, null, IA(0, -1), IA(1, 0)), /* some tests for modified element comparison, ticket #7093 */ SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)), SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "pech\u00E9", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche\u0301", "en", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), /* more tests for modified element comparison (with fr), ticket #7093 */ SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(107, -1), IA(5)), SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(24, 69, 79, -1), IA(5, 5, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(79, -1), IA(5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, -1), IA(5, 5, 5, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, -1), IA(5, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 24, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "p\u00E9che", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 79, 94, 107, -1), IA(5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "pech\u00E9", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 59, 94, -1), IA(5, 5, 6)), SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 59, 94, -1), IA(5, 5, 6)), SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, null, IA(3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 5, 5, 5, 6, 5)), SD(PECHE_WITH_ACCENTS, "peche\u0301", "fr", SECONDARY, ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", IA(3, 13, 36, 59, 79, 94, 107, -1), IA(5, 5, 5, 5, 5, 6, 5)), }; SearchData STRENGTHCANONICAL[] = { /* 012345678901234567890123456789012345678901234567890123456789 */ SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, 40, -1), IA(3, 3)), SD("The quick brown fox jumps over the lazy foxes", "fox", "en", PRIMARY, STANDARD_ELEMENT_COMPARISON, "wordbreaker", IA(16, -1), IA(3)), SD("blackbirds Pat p\u00E9ch\u00E9 p\u00EAche p\u00E9cher p\u00EAcher Tod T\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", "peche", "fr", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 21, 27, 34, -1), IA(5, 5, 5, 5)), SD("This is a toe T\u00F6ne", "toe", "de", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(10, 14, -1), IA(3, 2)), SD("A channel, another CHANNEL, more Channels, and one last channel...", "channel", "es", PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(2, 19, 33, 56, -1), IA(7, 7, 7, 7)), }; SearchData SUPPLEMENTARYCANONICAL[] = { /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ SD("abc \uD800\uDC00 \uD800\uDC01 \uD801\uDC00 \uD800\uDC00abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00", "\uD800\uDC00", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 13, 22, 26, 29, -1), IA(2, 2, 2, 2, 2)), SD("and\uD834\uDDB9this sentence", "\uD834\uDDB9", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(2)), SD("and \uD834\uDDB9 this sentence", " \uD834\uDDB9 ", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), SD("and-\uD834\uDDB9-this sentence", "-\uD834\uDDB9-", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), SD("and,\uD834\uDDB9,this sentence", ",\uD834\uDDB9,", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), SD("and?\uD834\uDDB9?this sentence", "?\uD834\uDDB9?", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(3, -1), IA(4)), }; static SearchData VARIABLE[] = { /* 012345678901234567890123456789012345678901234567890123456789 */ SD("blackbirds black blackbirds blackbird black-bird", "blackbird", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 17, 28, 38, -1), IA(9, 9, 9, 10)), /* * to see that it doesn't go into an infinite loop if the start of text is a ignorable character */ SD(" on", "go", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), SD("abcdefghijklmnopqrstuvwxyz", " ", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1), IA(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), /* testing tightest match */ SD(" abc a bc ab c a bc ab c", "abc", null, QUATERNARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, -1), IA(3)), /* 012345678901234567890123456789012345678901234567890123456789 */ SD(" abc a bc ab c a bc ab c", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(1, 6, 13, 21, 31, -1), IA(3, 4, 4, 5, 5)), /* totally ignorable text */ SD(" ---------------", "abc", null, SECONDARY, STANDARD_ELEMENT_COMPARISON, null, IA(-1), IA(0)), }; static SearchData TEXTCANONICAL[] = { SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)), SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)), }; static SearchData INDICPREFIXMATCH[] = { SD("\u0915\u0020\u0915\u0901\u0020\u0915\u0902\u0020\u0915\u0903\u0020\u0915\u0940\u0020\u0915\u093F\u0020\u0915\u0943\u0020\u0915\u093C\u0020\u0958", "\u0915", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 2, 5, 8, 11, 14, 17, 20, 23,-1), IA(1, 2, 2, 2, 1, 1, 1, 2, 1)), SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947", "\u0915\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(0, 3, 7, 11, -1), IA(2, 2, 2, 2)), SD("\u0915\u0924\u0020\u0915\u0924\u0940\u0020\u0915\u0924\u093F\u0020\u0915\u0924\u0947\u0020\u0915\u0943\u0924\u0020\u0915\u0943\u0924\u0947", "\u0915\u0943\u0924", null, PRIMARY, STANDARD_ELEMENT_COMPARISON, null, IA(15, 19, -1), IA(3, 3)), }; /** * Constructor */ public SearchTest() { } @Before public void init() throws Exception { m_en_us_ = (RuleBasedCollator)Collator.getInstance(Locale.US); m_fr_fr_ = (RuleBasedCollator)Collator.getInstance(Locale.FRANCE); m_de_ = (RuleBasedCollator)Collator.getInstance(new Locale("de", "DE")); m_es_ = (RuleBasedCollator)Collator.getInstance(new Locale("es", "ES")); m_en_wordbreaker_ = BreakIterator.getWordInstance(); m_en_characterbreaker_ = BreakIterator.getCharacterInstance(); String rules = m_de_.getRules() + EXTRACOLLATIONRULE; m_de_ = new RuleBasedCollator(rules); rules = m_es_.getRules() + EXTRACOLLATIONRULE; m_es_ = new RuleBasedCollator(rules); } RuleBasedCollator getCollator(String collator) { if (collator == null) { return m_en_us_; } if (collator.equals("fr")) { return m_fr_fr_; } else if (collator.equals("de")) { return m_de_; } else if (collator.equals("es")) { return m_es_; } else { return m_en_us_; } } BreakIterator getBreakIterator(String breaker) { if (breaker == null) { return null; } if (breaker.equals("wordbreaker")) { return m_en_wordbreaker_; } else { return m_en_characterbreaker_; } } boolean assertCanonicalEqual(SearchData search) { Collator collator = getCollator(search.collator); BreakIterator breaker = getBreakIterator(search.breaker); StringSearch strsrch; String text = search.text; String pattern = search.pattern; if (breaker != null) { breaker.setText(text); } collator.setStrength(search.strength); collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); strsrch.setElementComparisonType(search.cmpType); strsrch.setCanonical(true); } catch (Exception e) { errln("Error opening string search" + e.getMessage()); return false; } if (!assertEqualWithStringSearch(strsrch, search)) { collator.setStrength(TERTIARY); collator.setDecomposition(Collator.NO_DECOMPOSITION); return false; } collator.setStrength(TERTIARY); collator.setDecomposition(Collator.NO_DECOMPOSITION); return true; } boolean assertEqual(SearchData search) { Collator collator = getCollator(search.collator); BreakIterator breaker = getBreakIterator(search.breaker); StringSearch strsrch; String text = search.text; String pattern = search.pattern; if (breaker != null) { breaker.setText(text); } collator.setStrength(search.strength); try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); strsrch.setElementComparisonType(search.cmpType); } catch (Exception e) { errln("Error opening string search " + e.getMessage()); return false; } if (!assertEqualWithStringSearch(strsrch, search)) { collator.setStrength(TERTIARY); return false; } collator.setStrength(TERTIARY); return true; } boolean assertEqualWithAttribute(SearchData search, boolean canonical, boolean overlap) { Collator collator = getCollator(search.collator); BreakIterator breaker = getBreakIterator(search.breaker); StringSearch strsrch; String text = search.text; String pattern = search.pattern; if (breaker != null) { breaker.setText(text); } collator.setStrength(search.strength); try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), (RuleBasedCollator)collator, breaker); strsrch.setCanonical(canonical); strsrch.setOverlapping(overlap); strsrch.setElementComparisonType(search.cmpType); } catch (Exception e) { errln("Error opening string search " + e.getMessage()); return false; } if (!assertEqualWithStringSearch(strsrch, search)) { collator.setStrength(TERTIARY); return false; } collator.setStrength(TERTIARY); return true; } boolean assertEqualWithStringSearch(StringSearch strsrch, SearchData search) { int count = 0; int matchindex = search.offset[count]; String matchtext; if (strsrch.getMatchStart() != SearchIterator.DONE || strsrch.getMatchLength() != 0) { errln("Error with the initialization of match start and length"); } // start of following matches while (matchindex >= 0) { int matchlength = search.size[count]; strsrch.next(); //int x = strsrch.getMatchStart(); if (matchindex != strsrch.getMatchStart() || matchlength != strsrch.getMatchLength()) { errln("Text: " + search.text); errln("Searching forward for pattern: " + strsrch.getPattern()); errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return false; } count ++; matchtext = strsrch.getMatchedText(); String targetText = search.text; if (matchlength > 0 && targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) { errln("Error getting following matched text"); } matchindex = search.offset[count]; } strsrch.next(); if (strsrch.getMatchStart() != SearchIterator.DONE || strsrch.getMatchLength() != 0) { errln("Text: " + search.text); errln("Searching forward for pattern: " + strsrch.getPattern()); errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return false; } // start of preceding matches count = count == 0 ? 0 : count - 1; matchindex = search.offset[count]; while (matchindex >= 0) { int matchlength = search.size[count]; strsrch.previous(); if (matchindex != strsrch.getMatchStart() || matchlength != strsrch.getMatchLength()) { errln("Text: " + search.text); errln("Searching backward for pattern: " + strsrch.getPattern()); errln("Expected offset,len " + matchindex + ", " + matchlength + "; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return false; } matchtext = strsrch.getMatchedText(); String targetText = search.text; if (matchlength > 0 && targetText.substring(matchindex, matchindex + matchlength).compareTo(matchtext) != 0) { errln("Error getting following matched text"); } matchindex = count > 0 ? search.offset[count - 1] : -1; count --; } strsrch.previous(); if (strsrch.getMatchStart() != SearchIterator.DONE || strsrch.getMatchLength() != 0) { errln("Text: " + search.text); errln("Searching backward for pattern: " + strsrch.getPattern()); errln("Expected DONE offset,len -1, 0; got " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return false; } return true; } @Test public void TestConstructor() { String pattern = "pattern"; String text = "text"; StringCharacterIterator textiter = new StringCharacterIterator(text); Collator defaultcollator = Collator.getInstance(); BreakIterator breaker = BreakIterator.getCharacterInstance(); breaker.setText(text); StringSearch search = new StringSearch(pattern, text); if (!search.getPattern().equals(pattern) || !search.getTarget().equals(textiter) || !search.getCollator().equals(defaultcollator) /*|| !search.getBreakIterator().equals(breaker)*/) { errln("StringSearch(String, String) error"); } search = new StringSearch(pattern, textiter, m_fr_fr_); if (!search.getPattern().equals(pattern) || !search.getTarget().equals(textiter) || !search.getCollator().equals(m_fr_fr_) /*|| !search.getBreakIterator().equals(breaker)*/) { errln("StringSearch(String, StringCharacterIterator, " + "RuleBasedCollator) error"); } Locale de = new Locale("de", "DE"); breaker = BreakIterator.getCharacterInstance(de); breaker.setText(text); search = new StringSearch(pattern, textiter, de); if (!search.getPattern().equals(pattern) || !search.getTarget().equals(textiter) || !search.getCollator().equals(Collator.getInstance(de)) /*|| !search.getBreakIterator().equals(breaker)*/) { errln("StringSearch(String, StringCharacterIterator, Locale) " + "error"); } search = new StringSearch(pattern, textiter, m_fr_fr_, m_en_wordbreaker_); if (!search.getPattern().equals(pattern) || !search.getTarget().equals(textiter) || !search.getCollator().equals(m_fr_fr_) || !search.getBreakIterator().equals(m_en_wordbreaker_)) { errln("StringSearch(String, StringCharacterIterator, Locale) " + "error"); } } @Test public void TestBasic() { for (int count = 0; count < BASIC.length; count++) { if (!assertEqual(BASIC[count])) { errln("Error at test number " + count); } } } @Test public void TestBreakIterator() { String text = BREAKITERATOREXACT[0].text; String pattern = BREAKITERATOREXACT[0].pattern; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); } catch (Exception e) { errln("Error opening string search"); return; } strsrch.setBreakIterator(null); if (strsrch.getBreakIterator() != null) { errln("Error usearch_getBreakIterator returned wrong object"); } strsrch.setBreakIterator(m_en_characterbreaker_); if (!strsrch.getBreakIterator().equals(m_en_characterbreaker_)) { errln("Error usearch_getBreakIterator returned wrong object"); } strsrch.setBreakIterator(m_en_wordbreaker_); if (!strsrch.getBreakIterator().equals(m_en_wordbreaker_)) { errln("Error usearch_getBreakIterator returned wrong object"); } int count = 0; while (count < 4) { // special purposes for tests numbers 0-3 SearchData search = BREAKITERATOREXACT[count]; RuleBasedCollator collator = getCollator(search.collator); BreakIterator breaker = getBreakIterator(search.breaker); //StringSearch strsrch; text = search.text; pattern = search.pattern; if (breaker != null) { breaker.setText(text); } collator.setStrength(search.strength); strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker); if (strsrch.getBreakIterator() != breaker) { errln("Error setting break iterator"); } if (!assertEqualWithStringSearch(strsrch, search)) { collator.setStrength(TERTIARY); } search = BREAKITERATOREXACT[count + 1]; breaker = getBreakIterator(search.breaker); if (breaker != null) { breaker.setText(text); } strsrch.setBreakIterator(breaker); if (strsrch.getBreakIterator() != breaker) { errln("Error setting break iterator"); } strsrch.reset(); if (!assertEqualWithStringSearch(strsrch, search)) { errln("Error at test number " + count); } count += 2; } for (count = 0; count < BREAKITERATOREXACT.length; count++) { if (!assertEqual(BREAKITERATOREXACT[count])) { errln("Error at test number " + count); } } } @Test public void TestBreakIteratorCanonical() { int count = 0; while (count < 4) { // special purposes for tests numbers 0-3 SearchData search = BREAKITERATORCANONICAL[count]; String text = search.text; String pattern = search.pattern; RuleBasedCollator collator = getCollator(search.collator); collator.setStrength(search.strength); BreakIterator breaker = getBreakIterator(search.breaker); StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, breaker); } catch (Exception e) { errln("Error creating string search data"); return; } strsrch.setCanonical(true); if (!strsrch.getBreakIterator().equals(breaker)) { errln("Error setting break iterator"); return; } if (!assertEqualWithStringSearch(strsrch, search)) { collator.setStrength(TERTIARY); return; } search = BREAKITERATOREXACT[count + 1]; breaker = getBreakIterator(search.breaker); breaker.setText(strsrch.getTarget()); strsrch.setBreakIterator(breaker); if (!strsrch.getBreakIterator().equals(breaker)) { errln("Error setting break iterator"); return; } strsrch.reset(); strsrch.setCanonical(true); if (!assertEqualWithStringSearch(strsrch, search)) { errln("Error at test number " + count); return; } count += 2; } for (count = 0; count < BREAKITERATORCANONICAL.length; count++) { if (!assertEqual(BREAKITERATORCANONICAL[count])) { errln("Error at test number " + count); return; } } } @Test public void TestCanonical() { for (int count = 0; count < BASICCANONICAL.length; count++) { if (!assertCanonicalEqual(BASICCANONICAL[count])) { errln("Error at test number " + count); } } } @Test public void TestCollator() { // test collator that thinks "o" and "p" are the same thing String text = COLLATOR[0].text; String pattern = COLLATOR[0].pattern; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); } catch (Exception e) { errln("Error opening string search "); return; } if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) { return; } String rules = TESTCOLLATORRULE; RuleBasedCollator tailored = null; try { tailored = new RuleBasedCollator(rules); tailored.setStrength(COLLATOR[1].strength); } catch (Exception e) { errln("Error opening rule based collator "); return; } strsrch.setCollator(tailored); if (!strsrch.getCollator().equals(tailored)) { errln("Error setting rule based collator"); } strsrch.reset(); if (!assertEqualWithStringSearch(strsrch, COLLATOR[1])) { return; } strsrch.setCollator(m_en_us_); strsrch.reset(); if (!strsrch.getCollator().equals(m_en_us_)) { errln("Error setting rule based collator"); } if (!assertEqualWithStringSearch(strsrch, COLLATOR[0])) { errln("Error searching collator test"); } } @Test public void TestCollatorCanonical() { /* test collator that thinks "o" and "p" are the same thing */ String text = COLLATORCANONICAL[0].text; String pattern = COLLATORCANONICAL[0].pattern; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); strsrch.setCanonical(true); } catch (Exception e) { errln("Error opening string search "); } if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) { return; } String rules = TESTCOLLATORRULE; RuleBasedCollator tailored = null; try { tailored = new RuleBasedCollator(rules); tailored.setStrength(COLLATORCANONICAL[1].strength); tailored.setDecomposition(Collator.CANONICAL_DECOMPOSITION); } catch (Exception e) { errln("Error opening rule based collator "); } strsrch.setCollator(tailored); if (!strsrch.getCollator().equals(tailored)) { errln("Error setting rule based collator"); } strsrch.reset(); strsrch.setCanonical(true); if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[1])) { logln("COLLATORCANONICAL[1] failed"); // Error should already be reported. } strsrch.setCollator(m_en_us_); strsrch.reset(); if (!strsrch.getCollator().equals(m_en_us_)) { errln("Error setting rule based collator"); } if (!assertEqualWithStringSearch(strsrch, COLLATORCANONICAL[0])) { logln("COLLATORCANONICAL[0] failed"); // Error should already be reported. } } @Test public void TestCompositeBoundaries() { for (int count = 0; count < COMPOSITEBOUNDARIES.length; count++) { // logln("composite " + count); if (!assertEqual(COMPOSITEBOUNDARIES[count])) { errln("Error at test number " + count); } } } @Test public void TestCompositeBoundariesCanonical() { for (int count = 0; count < COMPOSITEBOUNDARIESCANONICAL.length; count++) { // logln("composite " + count); if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) { errln("Error at test number " + count); } } } @Test public void TestContraction() { String rules = CONTRACTIONRULE; RuleBasedCollator collator = null; try { collator = new RuleBasedCollator(rules); collator.setStrength(TERTIARY); collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); } catch (Exception e) { errln("Error opening collator "); } String text = "text"; String pattern = "pattern"; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); } catch (Exception e) { errln("Error opening string search "); } for (int count = 0; count< CONTRACTION.length; count++) { text = CONTRACTION[count].text; pattern = CONTRACTION[count].pattern; strsrch.setTarget(new StringCharacterIterator(text)); strsrch.setPattern(pattern); if (!assertEqualWithStringSearch(strsrch, CONTRACTION[count])) { errln("Error at test number " + count); } } } @Test public void TestContractionCanonical() { String rules = CONTRACTIONRULE; RuleBasedCollator collator = null; try { collator = new RuleBasedCollator(rules); collator.setStrength(TERTIARY); collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); } catch (Exception e) { errln("Error opening collator "); } String text = "text"; String pattern = "pattern"; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); strsrch.setCanonical(true); } catch (Exception e) { errln("Error opening string search"); } for (int count = 0; count < CONTRACTIONCANONICAL.length; count++) { text = CONTRACTIONCANONICAL[count].text; pattern = CONTRACTIONCANONICAL[count].pattern; strsrch.setTarget(new StringCharacterIterator(text)); strsrch.setPattern(pattern); if (!assertEqualWithStringSearch(strsrch, CONTRACTIONCANONICAL[count])) { errln("Error at test number " + count); } } } @Test public void TestGetMatch() { SearchData search = MATCH[0]; String text = search.text; String pattern = search.pattern; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); } catch (Exception e) { errln("Error opening string search "); return; } int count = 0; int matchindex = search.offset[count]; String matchtext; while (matchindex >= 0) { int matchlength = search.size[count]; strsrch.next(); if (matchindex != strsrch.getMatchStart() || matchlength != strsrch.getMatchLength()) { errln("Text: " + search.text); errln("Pattern: " + strsrch.getPattern()); errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return; } count++; matchtext = strsrch.getMatchedText(); if (matchtext.length() != matchlength){ errln("Error getting match text"); } matchindex = search.offset[count]; } strsrch.next(); if (strsrch.getMatchStart() != StringSearch.DONE || strsrch.getMatchLength() != 0) { errln("Error end of match not found"); } matchtext = strsrch.getMatchedText(); if (matchtext != null) { errln("Error getting null matches"); } } @Test public void TestGetSetAttribute() { String pattern = "pattern"; String text = "text"; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); } catch (Exception e) { errln("Error opening search"); return; } if (strsrch.isOverlapping()) { errln("Error default overlaping should be false"); } strsrch.setOverlapping(true); if (!strsrch.isOverlapping()) { errln("Error setting overlap true"); } strsrch.setOverlapping(false); if (strsrch.isOverlapping()) { errln("Error setting overlap false"); } strsrch.setCanonical(true); if (!strsrch.isCanonical()) { errln("Error setting canonical match true"); } strsrch.setCanonical(false); if (strsrch.isCanonical()) { errln("Error setting canonical match false"); } if (strsrch.getElementComparisonType() != STANDARD_ELEMENT_COMPARISON) { errln("Error default element comparison type should be STANDARD_ELEMENT_COMPARISON"); } strsrch.setElementComparisonType(ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD); if (strsrch.getElementComparisonType() != ElementComparisonType.PATTERN_BASE_WEIGHT_IS_WILDCARD) { errln("Error setting element comparison type PATTERN_BASE_WEIGHT_IS_WILDCARD"); } } @Test public void TestGetSetOffset() { String pattern = "1234567890123456"; String text = "12345678901234567890123456789012"; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); } catch (Exception e) { errln("Error opening search"); return; } /* testing out of bounds error */ try { strsrch.setIndex(-1); errln("Error expecting set offset error"); } catch (IndexOutOfBoundsException e) { logln("PASS: strsrch.setIndex(-1) failed as expected"); } try { strsrch.setIndex(128); errln("Error expecting set offset error"); } catch (IndexOutOfBoundsException e) { logln("PASS: strsrch.setIndex(128) failed as expected"); } for (int index = 0; index < BASIC.length; index++) { SearchData search = BASIC[index]; text =search.text; pattern = search.pattern; strsrch.setTarget(new StringCharacterIterator(text)); strsrch.setPattern(pattern); strsrch.getCollator().setStrength(search.strength); strsrch.reset(); int count = 0; int matchindex = search.offset[count]; while (matchindex >= 0) { int matchlength = search.size[count]; strsrch.next(); if (matchindex != strsrch.getMatchStart() || matchlength != strsrch.getMatchLength()) { errln("Text: " + text); errln("Pattern: " + strsrch.getPattern()); errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return; } matchindex = search.offset[count + 1] == -1 ? -1 : search.offset[count + 2]; if (search.offset[count + 1] != -1) { strsrch.setIndex(search.offset[count + 1] + 1); if (strsrch.getIndex() != search.offset[count + 1] + 1) { errln("Error setting offset\n"); return; } } count += 2; } strsrch.next(); if (strsrch.getMatchStart() != StringSearch.DONE) { errln("Text: " + text); errln("Pattern: " + strsrch.getPattern()); errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return; } } strsrch.getCollator().setStrength(TERTIARY); } @Test public void TestGetSetOffsetCanonical() { String text = "text"; String pattern = "pattern"; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); } catch (Exception e) { errln("Fail to open StringSearch!"); return; } strsrch.setCanonical(true); //TODO: setCanonical is not sufficient for canonical match. See #10725 strsrch.getCollator().setDecomposition(Collator.CANONICAL_DECOMPOSITION); /* testing out of bounds error */ try { strsrch.setIndex(-1); errln("Error expecting set offset error"); } catch (IndexOutOfBoundsException e) { logln("PASS: strsrch.setIndex(-1) failed as expected"); } try { strsrch.setIndex(128); errln("Error expecting set offset error"); } catch (IndexOutOfBoundsException e) { logln("PASS: strsrch.setIndex(128) failed as expected"); } for (int index = 0; index < BASICCANONICAL.length; index++) { SearchData search = BASICCANONICAL[index]; text = search.text; pattern = search.pattern; strsrch.setTarget(new StringCharacterIterator(text)); strsrch.setPattern(pattern); int count = 0; int matchindex = search.offset[count]; while (matchindex >= 0) { int matchlength = search.size[count]; strsrch.next(); if (matchindex != strsrch.getMatchStart() || matchlength != strsrch.getMatchLength()) { errln("Text: " + text); errln("Pattern: " + strsrch.getPattern()); errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return; } matchindex = search.offset[count + 1] == -1 ? -1 : search.offset[count + 2]; if (search.offset[count + 1] != -1) { strsrch.setIndex(search.offset[count + 1] + 1); if (strsrch.getIndex() != search.offset[count + 1] + 1) { errln("Error setting offset"); return; } } count += 2; } strsrch.next(); if (strsrch.getMatchStart() != StringSearch.DONE) { errln("Text: " + text); errln("Pattern: %s" + strsrch.getPattern()); errln("Error match found at " + strsrch.getMatchStart() + ", " + strsrch.getMatchLength()); return; } } strsrch.getCollator().setStrength(TERTIARY); strsrch.getCollator().setDecomposition(Collator.NO_DECOMPOSITION); } @Test public void TestIgnorable() { String rules = IGNORABLERULE; int count = 0; RuleBasedCollator collator = null; try { collator = new RuleBasedCollator(rules); collator.setStrength(IGNORABLE[count].strength); collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); } catch (Exception e) { errln("Error opening collator "); return; } String pattern = "pattern"; String text = "text"; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); } catch (Exception e) { errln("Error opening string search "); return; } for (; count < IGNORABLE.length; count++) { text = IGNORABLE[count].text; pattern = IGNORABLE[count].pattern; strsrch.setTarget(new StringCharacterIterator(text)); strsrch.setPattern(pattern); if (!assertEqualWithStringSearch(strsrch, IGNORABLE[count])) { errln("Error at test number " + count); } } } @Test public void TestInitialization() { String pattern; String text; String temp = "a"; StringSearch result; /* simple test on the pattern ce construction */ pattern = temp + temp; text = temp + temp + temp; try { result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); } catch (Exception e) { errln("Error opening search "); return; } /* testing if an extremely large pattern will fail the initialization */ pattern = ""; for (int count = 0; count < 512; count ++) { pattern += temp; } try { result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); logln("pattern:" + result.getPattern()); } catch (Exception e) { errln("Fail: an extremely large pattern will fail the initialization"); return; } } @Test public void TestNormCanonical() { m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION); for (int count = 0; count < NORMCANONICAL.length; count++) { if (!assertCanonicalEqual(NORMCANONICAL[count])) { errln("Error at test number " + count); } } m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION); } @Test public void TestNormExact() { int count; m_en_us_.setDecomposition(Collator.CANONICAL_DECOMPOSITION); for (count = 0; count < BASIC.length; count++) { if (!assertEqual(BASIC[count])) { errln("Error at test number " + count); } } for (count = 0; count < NORMEXACT.length; count++) { if (!assertEqual(NORMEXACT[count])) { errln("Error at test number " + count); } } m_en_us_.setDecomposition(Collator.NO_DECOMPOSITION); for (count = 0; count < NONNORMEXACT.length; count++) { if (!assertEqual(NONNORMEXACT[count])) { errln("Error at test number " + count); } } } @Test public void TestOpenClose() { StringSearch result; BreakIterator breakiter = m_en_wordbreaker_; String pattern = ""; String text = ""; String temp = "a"; StringCharacterIterator chariter= new StringCharacterIterator(text); /* testing null arguments */ try { result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); errln("Error: null arguments should produce an error"); } catch (Exception e) { logln("PASS: null arguments failed as expected"); } chariter.setText(text); try { result = new StringSearch(pattern, chariter, null, null); errln("Error: null arguments should produce an error"); } catch (Exception e) { logln("PASS: null arguments failed as expected"); } text = String.valueOf(0x1); try { result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); errln("Error: Empty pattern should produce an error"); } catch (Exception e) { logln("PASS: Empty pattern failed as expected"); } chariter.setText(text); try { result = new StringSearch(pattern, chariter, null, null); errln("Error: Empty pattern should produce an error"); } catch (Exception e) { logln("PASS: Empty pattern failed as expected"); } text = ""; pattern =temp; try { result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); errln("Error: Empty text should produce an error"); } catch (Exception e) { logln("PASS: Empty text failed as expected"); } chariter.setText(text); try { result = new StringSearch(pattern, chariter, null, null); errln("Error: Empty text should produce an error"); } catch (Exception e) { logln("PASS: Empty text failed as expected"); } text += temp; try { result = new StringSearch(pattern, new StringCharacterIterator(text), null, null); errln("Error: null arguments should produce an error"); } catch (Exception e) { logln("PASS: null arguments failed as expected"); } chariter.setText(text); try { result = new StringSearch(pattern, chariter, null, null); errln("Error: null arguments should produce an error"); } catch (Exception e) { logln("PASS: null arguments failed as expected"); } try { result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, null); } catch (Exception e) { errln("Error: null break iterator is valid for opening search"); } try { result = new StringSearch(pattern, chariter, m_en_us_, null); } catch (Exception e) { errln("Error: null break iterator is valid for opening search"); } try { result = new StringSearch(pattern, new StringCharacterIterator(text), Locale.ENGLISH); } catch (Exception e) { errln("Error: null break iterator is valid for opening search"); } try { result = new StringSearch(pattern, chariter, Locale.ENGLISH); } catch (Exception e) { errln("Error: null break iterator is valid for opening search"); } try { result = new StringSearch(pattern, new StringCharacterIterator(text), m_en_us_, breakiter); } catch (Exception e) { errln("Error: Break iterator is valid for opening search"); } try { result = new StringSearch(pattern, chariter, m_en_us_, null); logln("pattern:" + result.getPattern()); } catch (Exception e) { errln("Error: Break iterator is valid for opening search"); } } @Test public void TestOverlap() { int count; for (count = 0; count < OVERLAP.length; count++) { if (!assertEqualWithAttribute(OVERLAP[count], false, true)) { errln("Error at overlap test number " + count); } } for (count = 0; count < NONOVERLAP.length; count++) { if (!assertEqual(NONOVERLAP[count])) { errln("Error at non overlap test number " + count); } } for (count = 0; count < OVERLAP.length && count < NONOVERLAP.length; count++) { SearchData search = (OVERLAP[count]); String text = search.text; String pattern = search.pattern; RuleBasedCollator collator = getCollator(search.collator); StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, new StringCharacterIterator(text), collator, null); } catch (Exception e) { errln("error open StringSearch"); return; } strsrch.setOverlapping(true); if (!strsrch.isOverlapping()) { errln("Error setting overlap option"); } if (!assertEqualWithStringSearch(strsrch, search)) { return; } search = NONOVERLAP[count]; strsrch.setOverlapping(false); if (strsrch.isOverlapping()) { errln("Error setting overlap option"); } strsrch.reset(); if (!assertEqualWithStringSearch(strsrch, search)) { errln("Error at test number " + count); } } } @Test public void TestOverlapCanonical() { int count; for (count = 0; count < OVERLAPCANONICAL.length; count++) { if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], true, true)) { errln("Error at overlap test number %d" + count); } } for (count = 0; count < NONOVERLAP.length; count++) { if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) { errln("Error at non overlap test number %d" + count); } } for (count = 0; count < OVERLAPCANONICAL.length && count < NONOVERLAPCANONICAL.length; count++) { SearchData search = OVERLAPCANONICAL[count]; RuleBasedCollator collator = getCollator(search.collator); StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), collator, null); strsrch.setCanonical(true); strsrch.setOverlapping(true); if (strsrch.isOverlapping() != true) { errln("Error setting overlap option"); } if (!assertEqualWithStringSearch(strsrch, search)) { strsrch = null; return; } search = NONOVERLAPCANONICAL[count]; strsrch.setOverlapping(false); if (strsrch.isOverlapping() != false) { errln("Error setting overlap option"); } strsrch.reset(); if (!assertEqualWithStringSearch(strsrch, search)) { strsrch = null; errln("Error at test number %d" + count); } } } @Test public void TestPattern() { m_en_us_.setStrength(PATTERN[0].strength); StringSearch strsrch = new StringSearch(PATTERN[0].pattern, new StringCharacterIterator(PATTERN[0].text), m_en_us_, null); if (strsrch.getPattern() != PATTERN[0].pattern) { errln("Error setting pattern"); } if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) { m_en_us_.setStrength(TERTIARY); if (strsrch != null) { strsrch = null; } return; } strsrch.setPattern(PATTERN[1].pattern); if (PATTERN[1].pattern != strsrch.getPattern()) { errln("Error setting pattern"); m_en_us_.setStrength(TERTIARY); if (strsrch != null) { strsrch = null; } return; } strsrch.reset(); if (!assertEqualWithStringSearch(strsrch, PATTERN[1])) { m_en_us_.setStrength(TERTIARY); if (strsrch != null) { strsrch = null; } return; } strsrch.setPattern(PATTERN[0].pattern); if (PATTERN[0].pattern != strsrch.getPattern()) { errln("Error setting pattern"); m_en_us_.setStrength(TERTIARY); if (strsrch != null) { strsrch = null; } return; } strsrch.reset(); if (!assertEqualWithStringSearch(strsrch, PATTERN[0])) { m_en_us_.setStrength(TERTIARY); if (strsrch != null) { strsrch = null; } return; } /* enormous pattern size to see if this crashes */ String pattern = ""; for (int templength = 0; templength != 512; templength ++) { pattern += 0x61; } try{ strsrch.setPattern(pattern); }catch(Exception e) { errln("Error setting pattern with size 512"); } m_en_us_.setStrength(TERTIARY); if (strsrch != null) { strsrch = null; } } @Test public void TestPatternCanonical() { //StringCharacterIterator text = new StringCharacterIterator(PATTERNCANONICAL[0].text); m_en_us_.setStrength(PATTERNCANONICAL[0].strength); StringSearch strsrch = new StringSearch(PATTERNCANONICAL[0].pattern, new StringCharacterIterator(PATTERNCANONICAL[0].text), m_en_us_, null); strsrch.setCanonical(true); if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) { errln("Error setting pattern"); } if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) { m_en_us_.setStrength(TERTIARY); strsrch = null; return; } strsrch.setPattern(PATTERNCANONICAL[1].pattern); if (PATTERNCANONICAL[1].pattern != strsrch.getPattern()) { errln("Error setting pattern"); m_en_us_.setStrength(TERTIARY); strsrch = null; return; } strsrch.reset(); strsrch.setCanonical(true); if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[1])) { m_en_us_.setStrength(TERTIARY); strsrch = null; return; } strsrch.setPattern(PATTERNCANONICAL[0].pattern); if (PATTERNCANONICAL[0].pattern != strsrch.getPattern()) { errln("Error setting pattern"); m_en_us_.setStrength(TERTIARY); strsrch = null; return; } strsrch.reset(); strsrch.setCanonical(true); if (!assertEqualWithStringSearch(strsrch, PATTERNCANONICAL[0])) { m_en_us_.setStrength(TERTIARY); strsrch = null; return; } } @Test public void TestReset() { StringCharacterIterator text = new StringCharacterIterator("fish fish"); String pattern = "s"; StringSearch strsrch = new StringSearch(pattern, text, m_en_us_, null); strsrch.setOverlapping(true); strsrch.setCanonical(true); strsrch.setIndex(9); strsrch.reset(); if (strsrch.isCanonical() || strsrch.isOverlapping() || strsrch.getIndex() != 0 || strsrch.getMatchLength() != 0 || strsrch.getMatchStart() != SearchIterator.DONE) { errln("Error resetting string search"); } strsrch.previous(); if (strsrch.getMatchStart() != 7 || strsrch.getMatchLength() != 1) { errln("Error resetting string search\n"); } } @Test public void TestSetMatch() { for (int count = 0; count < MATCH.length; count++) { SearchData search = MATCH[count]; StringSearch strsrch = new StringSearch(search.pattern, new StringCharacterIterator(search.text), m_en_us_, null); int size = 0; while (search.offset[size] != -1) { size ++; } if (strsrch.first() != search.offset[0]) { errln("Error getting first match"); } if (strsrch.last() != search.offset[size -1]) { errln("Error getting last match"); } int index = 0; while (index < size) { if (index + 2 < size) { if (strsrch.following(search.offset[index + 2] - 1) != search.offset[index + 2]) { errln("Error getting following match at index " + (search.offset[index + 2]-1)); } } if (index + 1 < size) { if (strsrch.preceding(search.offset[index + 1] + search.size[index + 1] + 1) != search.offset[index + 1]) { errln("Error getting preceeding match at index " + (search.offset[index + 1] + 1)); } } index += 2; } if (strsrch.following(search.text.length()) != SearchIterator.DONE) { errln("Error expecting out of bounds match"); } if (strsrch.preceding(0) != SearchIterator.DONE) { errln("Error expecting out of bounds match"); } } } @Test public void TestStrength() { for (int count = 0; count < STRENGTH.length; count++) { if (!assertEqual(STRENGTH[count])) { errln("Error at test number " + count); } } } @Test public void TestStrengthCanonical() { for (int count = 0; count < STRENGTHCANONICAL.length; count++) { if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) { errln("Error at test number" + count); } } } @Test public void TestSupplementary() { for (int count = 0; count < SUPPLEMENTARY.length; count++) { if (!assertEqual(SUPPLEMENTARY[count])) { errln("Error at test number " + count); } } } @Test public void TestSupplementaryCanonical() { for (int count = 0; count < SUPPLEMENTARYCANONICAL.length; count++) { if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) { errln("Error at test number" + count); } } } @Test public void TestText() { SearchData TEXT[] = { SD("the foxy brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(4, 15, -1), IA(3, 3)), SD("the quick brown fox", "fox", null, TERTIARY, STANDARD_ELEMENT_COMPARISON, null, IA(16, -1), IA(3)) }; StringCharacterIterator t = new StringCharacterIterator(TEXT[0].text); StringSearch strsrch = new StringSearch(TEXT[0].pattern, t, m_en_us_, null); if (!t.equals(strsrch.getTarget())) { errln("Error setting text"); } if (!assertEqualWithStringSearch(strsrch, TEXT[0])) { errln("Error at assertEqualWithStringSearch"); return; } t = new StringCharacterIterator(TEXT[1].text); strsrch.setTarget(t); if (!t.equals(strsrch.getTarget())) { errln("Error setting text"); return; } if (!assertEqualWithStringSearch(strsrch, TEXT[1])) { errln("Error at assertEqualWithStringSearch"); return; } } @Test public void TestTextCanonical() { StringCharacterIterator t = new StringCharacterIterator(TEXTCANONICAL[0].text); StringSearch strsrch = new StringSearch(TEXTCANONICAL[0].pattern, t, m_en_us_, null); strsrch.setCanonical(true); if (!t.equals(strsrch.getTarget())) { errln("Error setting text"); } if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) { strsrch = null; return; } t = new StringCharacterIterator(TEXTCANONICAL[1].text); strsrch.setTarget(t); if (!t.equals(strsrch.getTarget())) { errln("Error setting text"); strsrch = null; return; } if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[1])) { strsrch = null; return; } t = new StringCharacterIterator(TEXTCANONICAL[0].text); strsrch.setTarget(t); if (!t.equals(strsrch.getTarget())) { errln("Error setting text"); strsrch = null; return; } if (!assertEqualWithStringSearch(strsrch, TEXTCANONICAL[0])) { errln("Error at assertEqualWithStringSearch"); strsrch = null; return; } } @Test public void TestVariable() { m_en_us_.setAlternateHandlingShifted(true); for (int count = 0; count < VARIABLE.length; count++) { // logln("variable" + count); if (!assertEqual(VARIABLE[count])) { errln("Error at test number " + count); } } m_en_us_.setAlternateHandlingShifted(false); } @Test public void TestVariableCanonical() { m_en_us_.setAlternateHandlingShifted(true); for (int count = 0; count < VARIABLE.length; count++) { // logln("variable " + count); if (!assertCanonicalEqual(VARIABLE[count])) { errln("Error at test number " + count); } } m_en_us_.setAlternateHandlingShifted(false); } @Test public void TestSubClass() { class TestSearch extends SearchIterator { String pattern; String text; TestSearch(StringCharacterIterator target, BreakIterator breaker, String pattern) { super(target, breaker); this.pattern = pattern; StringBuffer buffer = new StringBuffer(); while (targetText.getIndex() != targetText.getEndIndex()) { buffer.append(targetText.current()); targetText.next(); } text = buffer.toString(); targetText.setIndex(targetText.getBeginIndex()); } @Override protected int handleNext(int start) { int match = text.indexOf(pattern, start); if (match < 0) { targetText.last(); return DONE; } targetText.setIndex(match); setMatchLength(pattern.length()); return match; } @Override protected int handlePrevious(int start) { int match = text.lastIndexOf(pattern, start - 1); if (match < 0) { targetText.setIndex(0); return DONE; } targetText.setIndex(match); setMatchLength(pattern.length()); return match; } @Override public int getIndex() { int result = targetText.getIndex(); if (result < 0 || result >= text.length()) { return DONE; } return result; } } TestSearch search = new TestSearch( new StringCharacterIterator("abc abcd abc"), null, "abc"); int expected[] = {0, 4, 9}; for (int i = 0; i < expected.length; i ++) { if (search.next() != expected[i]) { errln("Error getting next match"); } if (search.getMatchLength() != search.pattern.length()) { errln("Error getting next match length"); } } if (search.next() != SearchIterator.DONE) { errln("Error should have reached the end of the iteration"); } for (int i = expected.length - 1; i >= 0; i --) { if (search.previous() != expected[i]) { errln("Error getting next match"); } if (search.getMatchLength() != search.pattern.length()) { errln("Error getting next match length"); } } if (search.previous() != SearchIterator.DONE) { errln("Error should have reached the start of the iteration"); } } //Test for ticket 5024 @Test public void TestDiactricMatch() { String pattern = "pattern"; String text = "text"; StringSearch strsrch = null; try { strsrch = new StringSearch(pattern, text); } catch (Exception e) { errln("Error opening string search "); return; } for (int count = 0; count < DIACTRICMATCH.length; count++) { strsrch.setCollator(getCollator(DIACTRICMATCH[count].collator)); strsrch.getCollator().setStrength(DIACTRICMATCH[count].strength); strsrch.setBreakIterator(getBreakIterator(DIACTRICMATCH[count].breaker)); strsrch.reset(); text = DIACTRICMATCH[count].text; pattern = DIACTRICMATCH[count].pattern; strsrch.setTarget(new StringCharacterIterator(text)); strsrch.setPattern(pattern); if (!assertEqualWithStringSearch(strsrch, DIACTRICMATCH[count])) { errln("Error at test number " + count); } } } @Test public void TestUsingSearchCollator() { String scKoText = " " + /*01*/ "\uAC00 " + // simple LV Hangul /*03*/ "\uAC01 " + // simple LVT Hangul /*05*/ "\uAC0F " + // LVTT, last jamo expands for search /*07*/ "\uAFFF " + // LLVVVTT, every jamo expands for search /*09*/ "\u1100\u1161\u11A8 " + // 0xAC01 as conjoining jamo /*13*/ "\u1100\u1161\u1100 " + // 0xAC01 as basic conjoining jamo (per search rules) /*17*/ "\u3131\u314F\u3131 " + // 0xAC01 as compatibility jamo /*21*/ "\u1100\u1161\u11B6 " + // 0xAC0F as conjoining jamo; last expands for search /*25*/ "\u1100\u1161\u1105\u1112 " + // 0xAC0F as basic conjoining jamo; last expands for search /*30*/ "\u1101\u1170\u11B6 " + // 0xAFFF as conjoining jamo; all expand for search /*34*/ "\u00E6 " + // small letter ae, expands /*36*/ "\u1E4D " + // small letter o with tilde and acute, decomposes ""; String scKoPat0 = "\uAC01"; String scKoPat1 = "\u1100\u1161\u11A8"; // 0xAC01 as conjoining jamo String scKoPat2 = "\uAC0F"; String scKoPat3 = "\u1100\u1161\u1105\u1112"; // 0xAC0F as basic conjoining jamo String scKoPat4 = "\uAFFF"; String scKoPat5 = "\u1101\u1170\u11B6"; // 0xAFFF as conjoining jamo int[] scKoSrchOff01 = { 3, 9, 13 }; int[] scKoSrchOff23 = { 5, 21, 25 }; int[] scKoSrchOff45 = { 7, 30 }; int[] scKoStndOff01 = { 3, 9 }; int[] scKoStndOff2 = { 5, 21 }; int[] scKoStndOff3 = { 25 }; int[] scKoStndOff45 = { 7, 30 }; class PatternAndOffsets { private String pattern; private int[] offsets; PatternAndOffsets(String pat, int[] offs) { pattern = pat; offsets = offs; } public String getPattern() { return pattern; } public int[] getOffsets() { return offsets; } } final PatternAndOffsets[] scKoSrchPatternsOffsets = { new PatternAndOffsets( scKoPat0, scKoSrchOff01 ), new PatternAndOffsets( scKoPat1, scKoSrchOff01 ), new PatternAndOffsets( scKoPat2, scKoSrchOff23 ), new PatternAndOffsets( scKoPat3, scKoSrchOff23 ), new PatternAndOffsets( scKoPat4, scKoSrchOff45 ), new PatternAndOffsets( scKoPat5, scKoSrchOff45 ), }; final PatternAndOffsets[] scKoStndPatternsOffsets = { new PatternAndOffsets( scKoPat0, scKoStndOff01 ), new PatternAndOffsets( scKoPat1, scKoStndOff01 ), new PatternAndOffsets( scKoPat2, scKoStndOff2 ), new PatternAndOffsets( scKoPat3, scKoStndOff3 ), new PatternAndOffsets( scKoPat4, scKoStndOff45 ), new PatternAndOffsets( scKoPat5, scKoStndOff45 ), }; class TUSCItem { private String localeString; private String text; private PatternAndOffsets[] patternsAndOffsets; TUSCItem(String locStr, String txt, PatternAndOffsets[] patsAndOffs) { localeString = locStr; text = txt; patternsAndOffsets = patsAndOffs; } public String getLocaleString() { return localeString; } public String getText() { return text; } public PatternAndOffsets[] getPatternsAndOffsets() { return patternsAndOffsets; } } final TUSCItem[] tuscItems = { new TUSCItem( "root", scKoText, scKoStndPatternsOffsets ), new TUSCItem( "root@collation=search", scKoText, scKoSrchPatternsOffsets ), new TUSCItem( "ko@collation=search", scKoText, scKoSrchPatternsOffsets ), }; String dummyPat = "a"; for (TUSCItem tuscItem: tuscItems) { String localeString = tuscItem.getLocaleString(); ULocale uloc = new ULocale(localeString); RuleBasedCollator col = null; try { col = (RuleBasedCollator)Collator.getInstance(uloc); } catch (Exception e) { errln("Error: in locale " + localeString + ", err in Collator.getInstance"); continue; } StringCharacterIterator ci = new StringCharacterIterator(tuscItem.getText()); StringSearch srch = new StringSearch(dummyPat, ci, col); for ( PatternAndOffsets patternAndOffsets: tuscItem.getPatternsAndOffsets() ) { srch.setPattern(patternAndOffsets.getPattern()); int[] offsets = patternAndOffsets.getOffsets(); int ioff, noff = offsets.length; int offset; srch.reset(); ioff = 0; while (true) { offset = srch.next(); if (offset == SearchIterator.DONE) { break; } if ( ioff < noff ) { if ( offset != offsets[ioff] ) { errln("Error: in locale " + localeString + ", expected SearchIterator.next() " + offsets[ioff] + ", got " + offset); //ioff = noff; //break; } ioff++; } else { errln("Error: in locale " + localeString + ", SearchIterator.next() returned more matches than expected"); } } if ( ioff < noff ) { errln("Error: in locale " + localeString + ", SearchIterator.next() returned fewer matches than expected"); } srch.reset(); ioff = noff; while (true) { offset = srch.previous(); if (offset == SearchIterator.DONE) { break; } if ( ioff > 0 ) { ioff--; if ( offset != offsets[ioff] ) { errln("Error: in locale " + localeString + ", expected SearchIterator.previous() " + offsets[ioff] + ", got " + offset); //ioff = 0; // break; } } else { errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned more matches than expected"); } } if ( ioff > 0 ) { errln("Error: in locale " + localeString + ", expected SearchIterator.previous() returned fewer matches than expected"); } } } } @Test public void TestIndicPrefixMatch() { for (int count = 0; count < INDICPREFIXMATCH.length; count++) { if (!assertEqual(INDICPREFIXMATCH[count])) { errln("Error at test number" + count); } } } // Test case for ticket#12555 @Test public void TestLongPattern() { StringBuilder pattern = new StringBuilder(); for (int i = 0; i < 255; i++) { pattern.append('a'); } // appends a character producing multiple ce32 at // index 256. pattern.append('á'); CharacterIterator target = new StringCharacterIterator("not important"); try { StringSearch ss = new StringSearch(pattern.toString(), target, Locale.ENGLISH); assertNotNull("Non-null StringSearch instance", ss); } catch (Exception e) { errln("Error initializing a new StringSearch object"); } } }