/* ******************************************************************************* * Copyright (C) 2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * created on: 2011jan10 * created by: Markus W. Scherer * ported from ICU4C ucharstrietest.h/.cpp */ package com.ibm.icu.dev.test.util; import java.util.NoSuchElementException; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.BytesTrie; import com.ibm.icu.util.CharsTrie; import com.ibm.icu.util.CharsTrieBuilder; import com.ibm.icu.util.StringTrieBuilder; public class CharsTrieTest extends TestFmwk { public static void main(String[] args) throws Exception { new CharsTrieTest().run(args); } public CharsTrieTest() {} // All test functions have a TestNN prefix where NN is a double-digit number. // This is so that when tests are run in sorted order // the simpler ones are run first. // If there is a problem, the simpler ones are easier to step through. public void Test00Builder() { builder_.clear(); try { builder_.build(StringTrieBuilder.Option.FAST); errln("CharsTrieBuilder().build() did not throw IndexOutOfBoundsException"); return; } catch(IndexOutOfBoundsException e) { // good } try { builder_.add("=", 0).add("=", 1); errln("CharsTrieBuilder.add() did not detect duplicates"); return; } catch(IllegalArgumentException e) { // good } } private static final class StringAndValue { public StringAndValue(String str, int val) { s=str; value=val; } public String s; public int value; } // Note: C++ StringAndValue initializers converted to Java syntax // with Eclipse Find/Replace regular expressions: // Find: \{ (".*", [-0-9xa-fA-F]+) \} // Replace with: new StringAndValue($1) public void Test10Empty() { final StringAndValue[] data={ new StringAndValue("", 0) }; checkData(data); } public void Test11_a() { final StringAndValue[] data={ new StringAndValue("a", 1) }; checkData(data); } public void Test12_a_ab() { final StringAndValue[] data={ new StringAndValue("a", 1), new StringAndValue("ab", 100) }; checkData(data); } public void Test20ShortestBranch() { final StringAndValue[] data={ new StringAndValue("a", 1000), new StringAndValue("b", 2000) }; checkData(data); } public void Test21Branches() { final StringAndValue[] data={ new StringAndValue("a", 0x10), new StringAndValue("cc", 0x40), new StringAndValue("e", 0x100), new StringAndValue("ggg", 0x400), new StringAndValue("i", 0x1000), new StringAndValue("kkkk", 0x4000), new StringAndValue("n", 0x10000), new StringAndValue("ppppp", 0x40000), new StringAndValue("r", 0x100000), new StringAndValue("sss", 0x200000), new StringAndValue("t", 0x400000), new StringAndValue("uu", 0x800000), new StringAndValue("vv", 0x7fffffff), new StringAndValue("zz", 0x80000000) }; for(int length=2; length<=data.length; ++length) { logln("TestBranches length="+length); checkData(data, length); } } public void Test22LongSequence() { final StringAndValue[] data={ new StringAndValue("a", -1), // sequence of linear-match nodes new StringAndValue("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2), // more than 256 units new StringAndValue( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3) }; checkData(data); } public void Test23LongBranch() { // Split-branch and interesting compact-integer values. final StringAndValue[] data={ new StringAndValue("a", -2), new StringAndValue("b", -1), new StringAndValue("c", 0), new StringAndValue("d2", 1), new StringAndValue("f", 0x3f), new StringAndValue("g", 0x40), new StringAndValue("h", 0x41), new StringAndValue("j23", 0x1900), new StringAndValue("j24", 0x19ff), new StringAndValue("j25", 0x1a00), new StringAndValue("k2", 0x1a80), new StringAndValue("k3", 0x1aff), new StringAndValue("l234567890", 0x1b00), new StringAndValue("l234567890123", 0x1b01), new StringAndValue("nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff), new StringAndValue("oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000), new StringAndValue("pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000), new StringAndValue("r", 0x333333), new StringAndValue("s2345", 0x4444444), new StringAndValue("t234567890", 0x77777777), new StringAndValue("z", 0x80000001) }; checkData(data); } public void Test24ValuesForState() { // Check that saveState() and resetToState() interact properly // with next() and current(). final StringAndValue[] data={ new StringAndValue("a", -1), new StringAndValue("ab", -2), new StringAndValue("abc", -3), new StringAndValue("abcd", -4), new StringAndValue("abcde", -5), new StringAndValue("abcdef", -6) }; checkData(data); } public void Test30Compact() { // Duplicate trailing strings and values provide opportunities for compacting. final StringAndValue[] data={ new StringAndValue("+", 0), new StringAndValue("+august", 8), new StringAndValue("+december", 12), new StringAndValue("+july", 7), new StringAndValue("+june", 6), new StringAndValue("+november", 11), new StringAndValue("+october", 10), new StringAndValue("+september", 9), new StringAndValue("-", 0), new StringAndValue("-august", 8), new StringAndValue("-december", 12), new StringAndValue("-july", 7), new StringAndValue("-june", 6), new StringAndValue("-november", 11), new StringAndValue("-october", 10), new StringAndValue("-september", 9), // The l+n branch (with its sub-nodes) is a duplicate but will be written // both times because each time it follows a different linear-match node. new StringAndValue("xjuly", 7), new StringAndValue("xjune", 6) }; checkData(data); } public void Test31FirstForCodePoint() { final StringAndValue[] data={ new StringAndValue("a", 1), new StringAndValue("a\ud800", 2), new StringAndValue("a\ud800\udc00", 3), // "a\\U00010000" new StringAndValue("\ud840", 4), new StringAndValue("\ud840\udc00\udbff", 5), // "\\U00020000\udbff" new StringAndValue("\ud840\udc00\udbff\udfff", 6), // "\\U00020000\\U0010ffff" new StringAndValue("\ud840\udc00\udbff\udfffz", 7), // "\\U00020000\\U0010ffffz" new StringAndValue("\ud900\udc00xy", 8), // "\\U00050000xy" new StringAndValue("\ud900\udc00xyz", 9) // "\\U00050000xyz" }; checkData(data); } public void Test32NextForCodePoint() { final StringAndValue[] data={ // "\u4dff\\U00010000\u9999\\U00020000\udfff\\U0010ffff" new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc00\udfff\udbff\udfff", 2000000000), // "\u4dff\\U00010000\u9999\\U00020002" new StringAndValue("\u4dff\ud800\udc00\u9999\ud840\udc02", 44444), // "\u4dff\\U000103ff" new StringAndValue("\u4dff\ud800\udfff", 99999) }; CharsTrie trie=buildTrie(data, data.length, StringTrieBuilder.Option.FAST); BytesTrie.Result result; if( (result=trie.nextForCodePoint(0x4dff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x10000))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x9999))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x20000))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0xdfff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x10ffff))!=BytesTrie.Result.FINAL_VALUE || result!=trie.current() || trie.getValue()!=2000000000 ) { errln("CharsTrie.nextForCodePoint() fails for "+data[0].s); } if( (result=trie.firstForCodePoint(0x4dff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x10000))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x9999))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x20002))!=BytesTrie.Result.FINAL_VALUE || result!=trie.current() || trie.getValue()!=44444 ) { errln("CharsTrie.nextForCodePoint() fails for "+data[1].s); } if( (result=trie.reset().nextForCodePoint(0x4dff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x10000))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x9999))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x20222))!=BytesTrie.Result.NO_MATCH || result!=trie.current() // no match for trail surrogate ) { errln("CharsTrie.nextForCodePoint() fails for \u4dff\\U00010000\u9999\\U00020222"); } if( (result=trie.reset().nextForCodePoint(0x4dff))!=BytesTrie.Result.NO_VALUE || result!=trie.current() || (result=trie.nextForCodePoint(0x103ff))!=BytesTrie.Result.FINAL_VALUE || result!=trie.current() || trie.getValue()!=99999 ) { errln("CharsTrie.nextForCodePoint() fails for "+data[2].s); } } // Generate (string, value) pairs. // The first string (before next()) will be empty. private static final class Generator { public Generator() { value=4711; num=0; } public void next() { char c; s.setLength(0); s.append(c=(char)(value>>16)); s.append((char)(value>>4)); if((value&1)!=0) { s.append((char)value); } set.add(c); value+=((value>>5)&0x7ff)*3+1; ++num; } public CharSequence getString() { return s; } public int getValue() { return value; } public int countUniqueFirstChars() { return set.size(); } public int getIndex() { return num; } private StringBuilder s=new StringBuilder(); private UnicodeSet set=new UnicodeSet(); private int value; private int num; }; private CharsTrie buildLargeTrie(int numUniqueFirst) { Generator gen=new Generator(); builder_.clear(); while(gen.countUniqueFirstChars()1 ? expectedString.charAt(1) : 0; BytesTrie.Result firstResult=trie.first(c); int firstValue=firstResult.hasValue() ? trie.getValue() : -1; BytesTrie.Result nextResult=trie.next(nextCp); if(firstResult!=trie.reset().next(c) || firstResult!=trie.current() || firstValue!=(firstResult.hasValue() ? trie.getValue() : -1) || nextResult!=trie.next(nextCp) ) { errln(String.format("trie.first(U+%04X)!=trie.reset().next(same) for %s", c, data[i].s)); } c=expectedString.codePointAt(0); int cLength=Character.charCount(c); nextCp=expectedString.length()>cLength ? expectedString.codePointAt(cLength) : 0; firstResult=trie.firstForCodePoint(c); firstValue=firstResult.hasValue() ? trie.getValue() : -1; nextResult=trie.nextForCodePoint(nextCp); if(firstResult!=trie.reset().nextForCodePoint(c) || firstResult!=trie.current() || firstValue!=(firstResult.hasValue() ? trie.getValue() : -1) || nextResult!=trie.nextForCodePoint(nextCp) ) { errln(String.format("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", c, data[i].s)); } } trie.reset(); } private void checkNext(CharsTrie trie, StringAndValue[] data, int dataLength) { CharsTrie.State state=new CharsTrie.State(); for(int i=0; i