1/*
2 *******************************************************************************
3 * Copyright (C) 1996-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7package com.ibm.icu.dev.test.normalizer;
8
9import java.util.Collection;
10import java.util.Iterator;
11import java.util.Set;
12import java.util.SortedSet;
13import java.util.TreeSet;
14
15import com.ibm.icu.dev.test.TestFmwk;
16import com.ibm.icu.impl.Utility;
17import com.ibm.icu.lang.UCharacter;
18import com.ibm.icu.text.CanonicalIterator;
19import com.ibm.icu.text.Normalizer;
20import com.ibm.icu.text.UTF16;
21
22
23// TODO: fit into test framework
24
25public class TestCanonicalIterator extends TestFmwk {
26
27    static final boolean SHOW_NAMES = false;
28
29    public static void main(String[] args) throws Exception {
30        new TestCanonicalIterator().run(args);
31    }
32
33    static final String testArray[][] = {
34        {"\u00C5d\u0307\u0327", "A\u030Ad\u0307\u0327, A\u030Ad\u0327\u0307, A\u030A\u1E0B\u0327, "
35            + "A\u030A\u1E11\u0307, \u00C5d\u0307\u0327, \u00C5d\u0327\u0307, "
36            + "\u00C5\u1E0B\u0327, \u00C5\u1E11\u0307, \u212Bd\u0307\u0327, "
37            + "\u212Bd\u0327\u0307, \u212B\u1E0B\u0327, \u212B\u1E11\u0307"},
38        {"\u010d\u017E", "c\u030Cz\u030C, c\u030C\u017E, \u010Dz\u030C, \u010D\u017E"},
39        {"x\u0307\u0327", "x\u0307\u0327, x\u0327\u0307, \u1E8B\u0327"},
40    };
41
42    public void TestExhaustive() {
43        int counter = 0;
44        CanonicalIterator it = new CanonicalIterator("");
45        /*
46        CanonicalIterator slowIt = new CanonicalIterator("");
47        slowIt.SKIP_ZEROS = false;
48        */
49        //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name");
50        //Set itSet = new TreeSet();
51        //Set slowItSet = new TreeSet();
52
53
54        for (int i = 0; i < 0x10FFFF; ++i) {
55
56            // skip characters we know don't have decomps
57            int type = UCharacter.getType(i);
58            if (type == Character.UNASSIGNED || type == Character.PRIVATE_USE
59                || type == Character.SURROGATE) continue;
60
61            if ((++counter % 5000) == 0) logln("Testing " + Utility.hex(i,0));
62
63            String s = UTF16.valueOf(i);
64            characterTest(s, i, it);
65
66            characterTest(s + "\u0345", i, it);
67        }
68    }
69
70    public int TestSpeed() {
71         // skip unless verbose
72        if (!isVerbose()) return 0;
73
74           String s = "\uAC01\u0345";
75
76        CanonicalIterator it = new CanonicalIterator(s);
77        double start, end;
78        int x = 0; // just to keep code from optimizing away.
79        int iterations = 10000;
80        double slowDelta = 0;
81
82        /*
83        CanonicalIterator slowIt = new CanonicalIterator(s);
84        slowIt.SKIP_ZEROS = false;
85
86        start = System.currentTimeMillis();
87        for (int i = 0; i < iterations; ++i) {
88            slowIt.setSource(s);
89            while (true) {
90                String item = slowIt.next();
91                if (item == null) break;
92                x += item.length();
93            }
94        }
95        end = System.currentTimeMillis();
96        double slowDelta = (end-start) / iterations;
97        logln("Slow iteration: " + slowDelta);
98        */
99
100        start = System.currentTimeMillis();
101        for (int i = 0; i < iterations; ++i) {
102            it.setSource(s);
103            while (true) {
104                String item = it.next();
105                if (item == null) break;
106                x += item.length();
107            }
108        }
109        end = System.currentTimeMillis();
110        double fastDelta = (end-start) / iterations;
111        logln("Fast iteration: " + fastDelta + (slowDelta != 0 ? ", " + (fastDelta/slowDelta) : ""));
112
113
114        return x;
115    }
116
117    public void TestBasic() {
118//      This is not interesting anymore as the data is already built
119//      beforehand
120
121//        check build
122//        UnicodeSet ss = CanonicalIterator.getSafeStart();
123//        logln("Safe Start: " + ss.toPattern(true));
124//        ss = CanonicalIterator.getStarts('a');
125//        expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
126//            new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
127//            + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
128//                );
129
130        // check permute
131        // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
132
133        Set results = new TreeSet();
134        CanonicalIterator.permute("ABC", false, results);
135        expectEqual("Simple permutation ", "", collectionToString(results), "ABC, ACB, BAC, BCA, CAB, CBA");
136
137        // try samples
138        SortedSet set = new TreeSet();
139        for (int i = 0; i < testArray.length; ++i) {
140            //logln("Results for: " + name.transliterate(testArray[i]));
141            CanonicalIterator it = new CanonicalIterator(testArray[i][0]);
142           // int counter = 0;
143            set.clear();
144            String first = null;
145            while (true) {
146                String result = it.next();
147                if(first==null){
148                    first = result;
149                }
150                if (result == null) break;
151                set.add(result); // sort them
152                //logln(++counter + ": " + hex.transliterate(result));
153                //logln(" = " + name.transliterate(result));
154            }
155            expectEqual(i + ": ", testArray[i][0], collectionToString(set), testArray[i][1]);
156            it.reset();
157            if(!it.next().equals(first)){
158                errln("CanonicalIterator.reset() failed");
159            }
160            if(!it.getSource().equals(Normalizer.normalize(testArray[i][0],Normalizer.NFD))){
161                errln("CanonicalIterator.getSource() does not return NFD of input source");
162            }
163        }
164    }
165
166    public void expectEqual(String message, String item, Object a, Object b) {
167        if (!a.equals(b)) {
168            errln("FAIL: " + message + getReadable(item));
169            errln("\t" + getReadable(a));
170            errln("\t" + getReadable(b));
171        } else {
172            logln("Checked: " + message + getReadable(item));
173            logln("\t" + getReadable(a));
174            logln("\t" + getReadable(b));
175        }
176    }
177
178    //Transliterator name = null;
179    //Transliterator hex = null;
180
181    public String getReadable(Object obj) {
182        if (obj == null) return "null";
183        String s = obj.toString();
184        if (s.length() == 0) return "";
185        // set up for readable display
186        //if (name == null) name = Transliterator.getInstance("[^\\ -\\u007F] name");
187        //if (hex == null) hex = Transliterator.getInstance("[^\\ -\\u007F] hex");
188        return "[" + (SHOW_NAMES ? hex(s) + "; " : "") + hex(s) + "]";
189    }
190
191    public void characterTest(String s, int ch, CanonicalIterator it)
192    {
193        int mixedCounter = 0;
194        int lastMixedCounter = -1;
195        boolean gotDecomp = false;
196        boolean gotComp = false;
197        boolean gotSource = false;
198        String decomp = Normalizer.decompose(s, false);
199        String comp = Normalizer.compose(s, false);
200
201        // skip characters that don't have either decomp.
202        // need quick test for this!
203        if (s.equals(decomp) && s.equals(comp)) return;
204
205        it.setSource(s);
206
207        while (true) {
208            String item = it.next();
209            if (item == null) break;
210            if (item.equals(s)) gotSource = true;
211            if (item.equals(decomp)) gotDecomp = true;
212            if (item.equals(comp)) gotComp = true;
213            if ((mixedCounter & 0x7F) == 0 && (ch < 0xAD00 || ch > 0xAC00 + 11172)) {
214                if (lastMixedCounter != mixedCounter) {
215                    logln("");
216                    lastMixedCounter = mixedCounter;
217                }
218                logln("\t" + mixedCounter + "\t" + hex(item)
219                + (item.equals(s) ? "\t(*original*)" : "")
220                + (item.equals(decomp) ? "\t(*decomp*)" : "")
221                + (item.equals(comp) ? "\t(*comp*)" : "")
222                );
223            }
224
225        }
226
227        // check that zeros optimization doesn't mess up.
228        /*
229        if (true) {
230            it.reset();
231            itSet.clear();
232            while (true) {
233                String item = it.next();
234                if (item == null) break;
235                itSet.add(item);
236            }
237            slowIt.setSource(s);
238            slowItSet.clear();
239            while (true) {
240                String item = slowIt.next();
241                if (item == null) break;
242                slowItSet.add(item);
243            }
244            if (!itSet.equals(slowItSet)) {
245                errln("Zero optimization failure with " + getReadable(s));
246            }
247        }
248        */
249
250        mixedCounter++;
251        if (!gotSource || !gotDecomp || !gotComp) {
252            errln("FAIL CanonicalIterator: " + s + " decomp: " +decomp+" comp: "+comp);
253            it.reset();
254            for(String item=it.next();item!=null;item=it.next()){
255                err(item + "    ");
256            }
257            errln("");
258        }
259    }
260
261    static String collectionToString(Collection col) {
262        StringBuffer result = new StringBuffer();
263        Iterator it = col.iterator();
264        while (it.hasNext()) {
265            if (result.length() != 0) result.append(", ");
266            result.append(it.next().toString());
267        }
268        return result.toString();
269    }
270}