1/* 2 ******************************************************************************* 3 * Copyright (C) 1996-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7package com.ibm.icu.dev.test.normalizer; 8 9import java.util.Collection; 10import java.util.Iterator; 11import java.util.Set; 12import java.util.SortedSet; 13import java.util.TreeSet; 14 15import com.ibm.icu.dev.test.TestFmwk; 16import com.ibm.icu.impl.Utility; 17import com.ibm.icu.lang.UCharacter; 18import com.ibm.icu.text.CanonicalIterator; 19import com.ibm.icu.text.Normalizer; 20import com.ibm.icu.text.UTF16; 21 22 23// TODO: fit into test framework 24 25public class TestCanonicalIterator extends TestFmwk { 26 27 static final boolean SHOW_NAMES = false; 28 29 public static void main(String[] args) throws Exception { 30 new TestCanonicalIterator().run(args); 31 } 32 33 static final String testArray[][] = { 34 {"\u00C5d\u0307\u0327", "A\u030Ad\u0307\u0327, A\u030Ad\u0327\u0307, A\u030A\u1E0B\u0327, " 35 + "A\u030A\u1E11\u0307, \u00C5d\u0307\u0327, \u00C5d\u0327\u0307, " 36 + "\u00C5\u1E0B\u0327, \u00C5\u1E11\u0307, \u212Bd\u0307\u0327, " 37 + "\u212Bd\u0327\u0307, \u212B\u1E0B\u0327, \u212B\u1E11\u0307"}, 38 {"\u010d\u017E", "c\u030Cz\u030C, c\u030C\u017E, \u010Dz\u030C, \u010D\u017E"}, 39 {"x\u0307\u0327", "x\u0307\u0327, x\u0327\u0307, \u1E8B\u0327"}, 40 }; 41 42 public void TestExhaustive() { 43 int counter = 0; 44 CanonicalIterator it = new CanonicalIterator(""); 45 /* 46 CanonicalIterator slowIt = new CanonicalIterator(""); 47 slowIt.SKIP_ZEROS = false; 48 */ 49 //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name"); 50 //Set itSet = new TreeSet(); 51 //Set slowItSet = new TreeSet(); 52 53 54 for (int i = 0; i < 0x10FFFF; ++i) { 55 56 // skip characters we know don't have decomps 57 int type = UCharacter.getType(i); 58 if (type == Character.UNASSIGNED || type == Character.PRIVATE_USE 59 || type == Character.SURROGATE) continue; 60 61 if ((++counter % 5000) == 0) logln("Testing " + Utility.hex(i,0)); 62 63 String s = UTF16.valueOf(i); 64 characterTest(s, i, it); 65 66 characterTest(s + "\u0345", i, it); 67 } 68 } 69 70 public int TestSpeed() { 71 // skip unless verbose 72 if (!isVerbose()) return 0; 73 74 String s = "\uAC01\u0345"; 75 76 CanonicalIterator it = new CanonicalIterator(s); 77 double start, end; 78 int x = 0; // just to keep code from optimizing away. 79 int iterations = 10000; 80 double slowDelta = 0; 81 82 /* 83 CanonicalIterator slowIt = new CanonicalIterator(s); 84 slowIt.SKIP_ZEROS = false; 85 86 start = System.currentTimeMillis(); 87 for (int i = 0; i < iterations; ++i) { 88 slowIt.setSource(s); 89 while (true) { 90 String item = slowIt.next(); 91 if (item == null) break; 92 x += item.length(); 93 } 94 } 95 end = System.currentTimeMillis(); 96 double slowDelta = (end-start) / iterations; 97 logln("Slow iteration: " + slowDelta); 98 */ 99 100 start = System.currentTimeMillis(); 101 for (int i = 0; i < iterations; ++i) { 102 it.setSource(s); 103 while (true) { 104 String item = it.next(); 105 if (item == null) break; 106 x += item.length(); 107 } 108 } 109 end = System.currentTimeMillis(); 110 double fastDelta = (end-start) / iterations; 111 logln("Fast iteration: " + fastDelta + (slowDelta != 0 ? ", " + (fastDelta/slowDelta) : "")); 112 113 114 return x; 115 } 116 117 public void TestBasic() { 118// This is not interesting anymore as the data is already built 119// beforehand 120 121// check build 122// UnicodeSet ss = CanonicalIterator.getSafeStart(); 123// logln("Safe Start: " + ss.toPattern(true)); 124// ss = CanonicalIterator.getStarts('a'); 125// expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'), 126// new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB" 127// + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]") 128// ); 129 130 // check permute 131 // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted! 132 133 Set results = new TreeSet(); 134 CanonicalIterator.permute("ABC", false, results); 135 expectEqual("Simple permutation ", "", collectionToString(results), "ABC, ACB, BAC, BCA, CAB, CBA"); 136 137 // try samples 138 SortedSet set = new TreeSet(); 139 for (int i = 0; i < testArray.length; ++i) { 140 //logln("Results for: " + name.transliterate(testArray[i])); 141 CanonicalIterator it = new CanonicalIterator(testArray[i][0]); 142 // int counter = 0; 143 set.clear(); 144 String first = null; 145 while (true) { 146 String result = it.next(); 147 if(first==null){ 148 first = result; 149 } 150 if (result == null) break; 151 set.add(result); // sort them 152 //logln(++counter + ": " + hex.transliterate(result)); 153 //logln(" = " + name.transliterate(result)); 154 } 155 expectEqual(i + ": ", testArray[i][0], collectionToString(set), testArray[i][1]); 156 it.reset(); 157 if(!it.next().equals(first)){ 158 errln("CanonicalIterator.reset() failed"); 159 } 160 if(!it.getSource().equals(Normalizer.normalize(testArray[i][0],Normalizer.NFD))){ 161 errln("CanonicalIterator.getSource() does not return NFD of input source"); 162 } 163 } 164 } 165 166 public void expectEqual(String message, String item, Object a, Object b) { 167 if (!a.equals(b)) { 168 errln("FAIL: " + message + getReadable(item)); 169 errln("\t" + getReadable(a)); 170 errln("\t" + getReadable(b)); 171 } else { 172 logln("Checked: " + message + getReadable(item)); 173 logln("\t" + getReadable(a)); 174 logln("\t" + getReadable(b)); 175 } 176 } 177 178 //Transliterator name = null; 179 //Transliterator hex = null; 180 181 public String getReadable(Object obj) { 182 if (obj == null) return "null"; 183 String s = obj.toString(); 184 if (s.length() == 0) return ""; 185 // set up for readable display 186 //if (name == null) name = Transliterator.getInstance("[^\\ -\\u007F] name"); 187 //if (hex == null) hex = Transliterator.getInstance("[^\\ -\\u007F] hex"); 188 return "[" + (SHOW_NAMES ? hex(s) + "; " : "") + hex(s) + "]"; 189 } 190 191 public void characterTest(String s, int ch, CanonicalIterator it) 192 { 193 int mixedCounter = 0; 194 int lastMixedCounter = -1; 195 boolean gotDecomp = false; 196 boolean gotComp = false; 197 boolean gotSource = false; 198 String decomp = Normalizer.decompose(s, false); 199 String comp = Normalizer.compose(s, false); 200 201 // skip characters that don't have either decomp. 202 // need quick test for this! 203 if (s.equals(decomp) && s.equals(comp)) return; 204 205 it.setSource(s); 206 207 while (true) { 208 String item = it.next(); 209 if (item == null) break; 210 if (item.equals(s)) gotSource = true; 211 if (item.equals(decomp)) gotDecomp = true; 212 if (item.equals(comp)) gotComp = true; 213 if ((mixedCounter & 0x7F) == 0 && (ch < 0xAD00 || ch > 0xAC00 + 11172)) { 214 if (lastMixedCounter != mixedCounter) { 215 logln(""); 216 lastMixedCounter = mixedCounter; 217 } 218 logln("\t" + mixedCounter + "\t" + hex(item) 219 + (item.equals(s) ? "\t(*original*)" : "") 220 + (item.equals(decomp) ? "\t(*decomp*)" : "") 221 + (item.equals(comp) ? "\t(*comp*)" : "") 222 ); 223 } 224 225 } 226 227 // check that zeros optimization doesn't mess up. 228 /* 229 if (true) { 230 it.reset(); 231 itSet.clear(); 232 while (true) { 233 String item = it.next(); 234 if (item == null) break; 235 itSet.add(item); 236 } 237 slowIt.setSource(s); 238 slowItSet.clear(); 239 while (true) { 240 String item = slowIt.next(); 241 if (item == null) break; 242 slowItSet.add(item); 243 } 244 if (!itSet.equals(slowItSet)) { 245 errln("Zero optimization failure with " + getReadable(s)); 246 } 247 } 248 */ 249 250 mixedCounter++; 251 if (!gotSource || !gotDecomp || !gotComp) { 252 errln("FAIL CanonicalIterator: " + s + " decomp: " +decomp+" comp: "+comp); 253 it.reset(); 254 for(String item=it.next();item!=null;item=it.next()){ 255 err(item + " "); 256 } 257 errln(""); 258 } 259 } 260 261 static String collectionToString(Collection col) { 262 StringBuffer result = new StringBuffer(); 263 Iterator it = col.iterator(); 264 while (it.hasNext()) { 265 if (result.length() != 0) result.append(", "); 266 result.append(it.next().toString()); 267 } 268 return result.toString(); 269 } 270}