17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2010, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.normalizer;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestFmwk;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UProperty;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.ComposedCharIter;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.StringCharacterIterator;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class TestDeprecatedNormalizerAPI extends TestFmwk
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert{
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static void main(String[] args) throws Exception
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String[] tempArgs = new String[args.length];
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int count = 0;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Allow the test to be pointed at a specific version of the Unicode database
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //for (int i = 0; i < args.length; i++)
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //{
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    if (args[i].equals("-data")) {
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //        tempInfo = new UInfo(args[++i], args[++i]);
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    } else {
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //        tempArgs[count++] = args[i];
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //    }
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //}
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        args = new String[count];
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        System.arraycopy(tempArgs, 0, args, 0, count);
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        new TestDeprecatedNormalizerAPI().run(args);
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public TestDeprecatedNormalizerAPI() {
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestNormalizerAPI(){
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         // instantiate a Normalizer from a CharacterIterator
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s=Utility.unescape("a\u0308\uac00\\U0002f800");
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // make s a bit longer and more interesting
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        java.text.CharacterIterator iter = new StringCharacterIterator(s+s);
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //test deprecated constructors
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer norm = new Normalizer(iter, Normalizer.NFC,0);
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(norm.next()!=0xe4) {
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln("error in Normalizer(CharacterIterator).next()");
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer norm2 = new Normalizer(s,Normalizer.NFC,0);
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(norm2.next()!=0xe4) {
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln("error in Normalizer(CharacterIterator).next()");
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // test clone(), ==, and hashCode()
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer clone=(Normalizer)norm.clone();
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(clone.getBeginIndex()!= norm.getBeginIndex()){
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert           errln("error in Normalizer.getBeginIndex()");
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(clone.getEndIndex()!= norm.getEndIndex()){
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert           errln("error in Normalizer.getEndIndex()");
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // test setOption() and getOption()
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        clone.setOption(0xaa0000, true);
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        clone.setOption(0x20000, false);
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(clone.getOption(0x880000) ==0|| clone.getOption(0x20000)==1) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert           errln("error in Normalizer::setOption() or Normalizer::getOption()");
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //test deprecated normalize method
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer.normalize(s,Normalizer.NFC,0);
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //test deprecated compose method
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer.compose(s,false,0);
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        //test deprecated decompose method
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Normalizer.decompose(s,false,0);
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Run through all of the characters returned by a composed-char iterator
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and make sure that:
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <ul>
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <li>a) They do indeed have decompositions.
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <li>b) The decomposition according to the iterator is the same as
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *          returned by Normalizer.decompose().
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <li>c) All characters <em>not</em> returned by the iterator do not
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *          have decompositions.
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </ul>
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestComposedCharIter() {
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        doTestComposedChars(false);
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void doTestComposedChars(boolean compat) {
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int options = Normalizer.IGNORE_HANGUL;
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ComposedCharIter iter = new ComposedCharIter(compat, options);
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        char lastChar = 0;
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (iter.hasNext()) {
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char ch = iter.next();
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Test all characters between the last one and this one to make
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // sure that they don't have decompositions
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertNoDecomp(lastChar, ch, compat, options);
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastChar = ch;
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Now make sure that the decompositions for this character
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // make sense
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String chString   = new StringBuffer().append(ch).toString();
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String iterDecomp = iter.decomposition();
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String normDecomp = Normalizer.decompose(chString, compat);
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (iterDecomp.equals(chString)) {
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("ERROR: " + hex(ch) + " has identical decomp");
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            else if (!iterDecomp.equals(normDecomp)) {
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("ERROR: Normalizer decomp for " + hex(ch) + " (" + hex(normDecomp) + ")"
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    + " != iter decomp (" + hex(iterDecomp) + ")" );
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertNoDecomp(lastChar, '\uFFFF', compat, options);
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void assertNoDecomp(char start, char limit, boolean compat, int options)
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (char x = ++start; x < limit; x++) {
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String xString   = new StringBuffer().append(x).toString();
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String decomp = Normalizer.decompose(xString, compat);
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!decomp.equals(xString)) {
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("ERROR: " + hex(x) + " has decomposition (" + hex(decomp) + ")"
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    + " but was not returned by iterator");
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestRoundTrip() {
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int options = Normalizer.IGNORE_HANGUL;
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean compat = false;
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ComposedCharIter iter = new ComposedCharIter(false, options);
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (iter.hasNext()) {
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            final char ch = iter.next();
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String chStr = String.valueOf(ch);
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String decomp = iter.decomposition();
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String comp = Normalizer.compose(decomp, compat);
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (UCharacter.hasBinaryProperty(ch, UProperty.FULL_COMPOSITION_EXCLUSION)) {
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                logln("Skipped excluded char " + hex(ch) + " (" + UCharacter.getName(ch) + ")" );
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Avoid disparaged characters
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (decomp.length() == 4) continue;
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!comp.equals(chStr)) {
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("ERROR: Round trip invalid: " + hex(chStr) + " --> " + hex(decomp)
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    + " --> " + hex(comp));
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("  char decomp is '" + decomp + "'");
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
172