17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
3aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert * Copyright (C) 2009-2015, International Business Machines Corporation and
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.text;
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.BufferedReader;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.Reader;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.StringReader;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.ParseException;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Arrays;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.BitSet;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Comparator;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashSet;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.LinkedHashSet;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Locale;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Random;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Set;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.regex.Matcher;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.regex.Pattern;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestFmwk;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestUtil;
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestUtil.JavaVendor;
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UScript;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.IdentifierInfo;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer2;
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.SpoofChecker;
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.SpoofChecker.CheckResult;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.SpoofChecker.RestrictionLevel;
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class SpoofCheckerTest extends TestFmwk {
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static void main(String[] args) throws Exception {
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        new SpoofCheckerTest().run(args);
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Identifiers for verifying that spoof checking is minimally alive and working.
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    char[] goodLatinChars = { (char) 0x75, (char) 0x7a };
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String goodLatin = new String(goodLatinChars); /* "uz", all ASCII */
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* (not confusable) */
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    char[] scMixedChars = { (char) 0x73, (char) 0x0441 };
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String scMixed = new String(scMixedChars); /* "sc", with Cyrillic 'c' */
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* (mixed script, confusable */
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String scLatin = "sc";   /* "sc", plain ascii. */
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String goodCyrl = "\u0438\u043B";    // "Cyrillic small letter i and el"  Plain lower case Cyrillic letters, no latin confusables
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String goodGreek = "\u03c0\u03c6";   // "Greek small letter pi and phi"  Plain lower case Greek letters
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Various 1 l I look-alikes
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String lll_Latin_a = "lI1";   // small letter l, cap I, digit 1, all ASCII
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //  "\uFF29\u217C\u0196"  Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String lll_Latin_b = "\uff29\u217c\u0196";
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String lll_Cyrl = "\u0406\u04C0\u0031";  // "\u0406\u04C01"
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /* The skeleton transform for all of the 'lll' lookalikes is ascii lower case letter l. */
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String lll_Skel = "lll";
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String han_Hiragana = "\u3086\u308A \u77F3\u7530";  // Hiragana, space, Han
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Test basic constructor.
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestUSpoof() {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (sc == null) {
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln("FAIL: null SpoofChecker");
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Test build from source rules.
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestOpenFromSourceRules() {
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (TestUtil.getJavaVendor() == JavaVendor.IBM && TestUtil.getJavaVersion() == 5) {
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Note: IBM Java 5 has a bug reading a large UTF-8 text contents
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("Skip this test case because of the IBM Java 5 bug");
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String fileName;
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Reader confusables;
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Reader confusablesWholeScript;
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
93aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            SpoofChecker rsc = null;
94aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fileName = "unicode/confusables.txt";
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            confusables = TestUtil.getDataReader(fileName, "UTF-8");
97aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            try {
98aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                fileName = "unicode/confusablesWholeScript.txt";
99aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                confusablesWholeScript = TestUtil.getDataReader(fileName, "UTF-8");
100aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                try {
101aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    rsc = new SpoofChecker.Builder().setData(confusables, confusablesWholeScript).build();
102aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                } finally {
103aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    confusablesWholeScript.close();
104aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                }
105aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            } finally {
106aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                confusables.close();
107aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (rsc == null) {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("FAIL: null SpoofChecker");
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check that newly built-from-rules SpoofChecker is able to function.
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            checkSkeleton(rsc, "TestOpenFromSourceRules");
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            rsc.failsChecks("Hello", result);
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // The checker we just built from source rules should be equivalent to the
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //  default checker created from prebuilt rules baked into the ICU data.
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker defaultChecker = new SpoofChecker.Builder().build();
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertTrue("Checker built from rules equals default", defaultChecker.equals(rsc));
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker optionChecker = new SpoofChecker.Builder().
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    setRestrictionLevel(RestrictionLevel.UNRESTRICTIVE).build();
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertFalse("", optionChecker.equals(rsc));
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Stub source data to build into a test SpoofChecker
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String stubWSConfusables =
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                "# Stub Whole Script Confusable data\n" +
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                "0561          ; Armn; Cyrl; L #      (ա)  ARMENIAN SMALL LETTER AYB\n";
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String stubConfusables =
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                "# Stub confusables data\n" +
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                "05AD ; 0596 ;  SL  # ( ֭ → ֖ ) HEBREW ACCENT DEHI → HEBREW ACCENT TIPEHA   #\n";
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Verify that re-using a builder doesn't alter SpoofCheckers that were
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //  previously created by that builder. (The builder could modify data
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            //  being used by the existing checker)
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker.Builder builder = new SpoofChecker.Builder();
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker testChecker1 = builder.build();
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertTrue("", testChecker1.equals(defaultChecker));
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            builder.setData(new StringReader(stubConfusables), new StringReader(stubWSConfusables));
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            builder.setRestrictionLevel(RestrictionLevel.UNRESTRICTIVE);
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            builder.setChecks(SpoofChecker.SINGLE_SCRIPT_CONFUSABLE);
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Set<ULocale>allowedLocales = new HashSet<ULocale>();
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            allowedLocales.add(ULocale.JAPANESE);
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            allowedLocales.add(ULocale.FRENCH);
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            builder.setAllowedLocales(allowedLocales);
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker testChecker2 = builder.build();
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker testChecker3 = builder.build();
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertTrue("", testChecker1.equals(defaultChecker));
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertFalse("", testChecker2.equals(defaultChecker));
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertTrue("", testChecker2.equals(testChecker3));
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (java.io.IOException e) {
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln(e.toString());
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (ParseException e) {
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln(e.toString());
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set & Get Check Flags
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestGetSetChecks1() {
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.ALL_CHECKS).build();
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int t;
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        t = sc.getChecks();
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.ALL_CHECKS, t);
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setChecks(0).build();
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        t = sc.getChecks();
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, t);
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int checks = SpoofChecker.WHOLE_SCRIPT_CONFUSABLE | SpoofChecker.MIXED_SCRIPT_CONFUSABLE
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                | SpoofChecker.ANY_CASE;
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setChecks(checks).build();
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        t = sc.getChecks();
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", checks, t);
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * get & setAllowedChars
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestGetSetAllowedChars() {
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet us;
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet uset;
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        uset = sc.getAllowedChars();
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", uset.isFrozen());
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        us = new UnicodeSet((int) 0x41, (int) 0x5A); /* [A-Z] */
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setAllowedChars(us).build();
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", us, sc.getAllowedChars());
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * get & set Checks
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestGetSetChecks() {
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int checks;
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int checks2;
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean checkResults;
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checks = sc.getChecks();
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.ALL_CHECKS, checks);
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checks &= ~(SpoofChecker.SINGLE_SCRIPT | SpoofChecker.MIXED_SCRIPT_CONFUSABLE);
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setChecks(checks).build();
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checks2 = sc.getChecks();
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", checks, checks2);
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * The checks that were disabled just above are the same ones that the "scMixed" test fails. So with those tests
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * gone checking that Identifier should now succeed
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(scMixed);
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", checkResults);
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * AllowedLocales
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestAllowedLocales() {
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Set<ULocale> allowedLocales = null;
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Set<Locale> allowedJavaLocales = null;
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean checkResults;
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Default allowed locales list should be empty */
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedLocales = sc.getAllowedLocales();
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("Empty allowed locales", allowedLocales.isEmpty());
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedJavaLocales = sc.getAllowedJavaLocales();
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("Empty allowed Java locales", allowedJavaLocales.isEmpty());
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ULocale enloc = new ULocale("en");
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ULocale ruloc = new ULocale("ru_RU");
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedLocales = new HashSet<ULocale>();
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedLocales.add(enloc);
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedLocales.add(ruloc);
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setAllowedLocales(allowedLocales).build();
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedLocales = sc.getAllowedLocales();
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("en in allowed locales", allowedLocales.contains(enloc));
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("ru_RU in allowed locales", allowedLocales.contains(ruloc));
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Locale frlocJ = new Locale("fr");
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedJavaLocales = new HashSet<Locale>();
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedJavaLocales.add(frlocJ);
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setAllowedJavaLocales(allowedJavaLocales).build();
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("no en in allowed Java locales", allowedJavaLocales.contains(new Locale("en")));
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("fr in allowed Java locales", allowedJavaLocales.contains(frlocJ));
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Limit checks to SpoofChecker.CHAR_LIMIT. Some of the test data has whole script confusables also, which we
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * don't want to see in this test.
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).setAllowedLocales(allowedLocales).build();
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(goodLatin);
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", checkResults);
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(goodGreek, result);
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.CHAR_LIMIT, result.checks);
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(goodCyrl);
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", checkResults);
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Reset with an empty locale list, which should allow all characters to pass */
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        allowedLocales = new LinkedHashSet<ULocale>();
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CHAR_LIMIT).setAllowedLocales(allowedLocales).build();
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(goodGreek);
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", checkResults);
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * AllowedChars set/get the UnicodeSet of allowed characters.
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestAllowedChars() {
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet set;
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet tmpSet;
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean checkResults;
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* By default, we should see no restriction; the UnicodeSet should allow all characters. */
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        set = sc.getAllowedChars();
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tmpSet = new UnicodeSet(0, 0x10ffff);
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", tmpSet, set);
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Setting the allowed chars should enable the check. */
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setChecks(SpoofChecker.ALL_CHECKS & ~SpoofChecker.CHAR_LIMIT).build();
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tmpSet.remove(goodLatin.charAt(1));
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().setAllowedChars(tmpSet).build();
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Latin Identifier should now fail; other non-latin test cases should still be OK */
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(goodLatin, result);
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", checkResults);
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.CHAR_LIMIT | SpoofChecker.RESTRICTION_LEVEL, result.checks);
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(goodGreek, result);
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", checkResults);
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.WHOLE_SCRIPT_CONFUSABLE, result.checks);
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestCheck() {
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean checkResults;
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.position = 666;
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(goodLatin, result);
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", checkResults);
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, result.position);
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(goodCyrl, result);
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", checkResults);
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.position = 666;
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(scMixed, result);
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", checkResults);
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.MIXED_SCRIPT_CONFUSABLE | SpoofChecker.SINGLE_SCRIPT, result.checks);
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, result.position);
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.position = 666;
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.failsChecks(han_Hiragana, result);
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", checkResults);
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, result.position);
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, result.checks);
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestAreConfusable1() {
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int checkResults;
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.areConfusable(scLatin, scMixed);
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.MIXED_SCRIPT_CONFUSABLE, checkResults);
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.areConfusable(goodGreek, scLatin);
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, checkResults);
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkResults = sc.areConfusable(lll_Latin_a, lll_Latin_b);
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.SINGLE_SCRIPT_CONFUSABLE, checkResults);
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestGetSkeleton() {
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String dest;
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dest = sc.getSkeleton(SpoofChecker.ANY_CASE, lll_Latin_a);
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", lll_Skel, dest);
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * IntlTestSpoof is the top level test class for the Unicode Spoof detection tests
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Test the USpoofDetector API functions that require C++
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // The pure C part of the API, which is most of it, is tested in cintltst
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * IntlTestSpoof tests for USpoofDetector
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestSpoofAPI() {
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s = "xyz";  // Many latin ranges are whole-script confusable with other scripts.
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If this test starts failing, consult confusablesWholeScript.txt
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.position = 666;
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean checkResults = sc.failsChecks(s, result);
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", checkResults);
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, result.position);
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().build();
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s1 = "cxs";
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s2 = Utility.unescape("\\u0441\\u0445\\u0455"); // Cyrillic "cxs"
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int checkResult = sc.areConfusable(s1, s2);
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.MIXED_SCRIPT_CONFUSABLE | SpoofChecker.WHOLE_SCRIPT_CONFUSABLE, checkResult);
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        sc = new SpoofChecker.Builder().build();
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        s = "I1l0O";
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String dest = sc.getSkeleton(SpoofChecker.ANY_CASE, s);
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", dest, "lllOO");
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestSkeleton() {
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, "TestSkeleton");
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // testSkeleton. Spot check a number of confusable skeleton substitutions from the
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Unicode data file confusables.txt
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Test cases chosen for substitutions of various lengths, and
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // membership in different mapping tables.
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void checkSkeleton(SpoofChecker sc, String testName) {
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ML = 0;
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int SL = SpoofChecker.SINGLE_SCRIPT_CONFUSABLE;
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int MA = SpoofChecker.ANY_CASE;
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int SA = SpoofChecker.SINGLE_SCRIPT_CONFUSABLE | SpoofChecker.ANY_CASE;
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
408f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, MA, "\\u02b9identifier'",  "'identifier'",  testName);
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, SL, "nochange", "nochange", testName);
411f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SA, "nochange", "nochange", testName);
412f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, ML, "nochange", "nochange", testName);
413f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, MA, "nochange", "nochange", testName);
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, MA, "love", "love", testName);
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, MA, "1ove", "love", testName);   // Digit 1 to letter l
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, ML, "OOPS", "OOPS", testName);
417f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, ML, "00PS", "OOPS", testName);
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, MA, "OOPS", "OOPS", testName);
419f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, MA, "00PS", "OOPS", testName);   // Digit 0 to letter O
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, SL, "\\u059c", "\\u0301", testName);
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, SL, "\\u2A74", "\\u003A\\u003A\\u003D", testName);
422f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SL, "\\u247E", "(ll)", testName);
423f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u006c\\u0644\\u006f", testName);
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
425f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // 0C83 mapping existed in the ML and MA tables, did not exist in SL, SA (Original Unicode 7)
426f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        //   mapping exists in all tables (ICU 55).
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 0C83 ; 0983 ; ML #  KANNADA SIGN VISARGA to
428f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SL, "\\u0C83", "\\u0983", testName);
429f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SA, "\\u0C83", "\\u0983", testName);
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, ML, "\\u0C83", "\\u0983", testName);
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, MA, "\\u0C83", "\\u0983", testName);
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
433f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // 0391 mappings existed only in MA and SA tables (Original Unicode 7).
434f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        //      mappings exist in all tables (ICU 55)
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, MA, "\\u0391", "A", testName);
436f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SA, "\\u0391", "A", testName);
437f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, ML, "\\u0391", "A", testName);
438f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SL, "\\u0391", "A", testName);
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
440f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // 13CF Mappings in all four tables, different in MA (Original Unicode 7).
441f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        //      Mapping same in all tables (ICU 55)
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, ML, "\\u13CF", "b", testName);
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, MA, "\\u13CF", "b", testName);
444f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SL, "\\u13CF", "b", testName);
445f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        checkSkeleton(sc, SA, "\\u13CF", "b", testName);
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 0022 ; 0027 0027 ;
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // all tables
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, SL, "\"", "\\u0027\\u0027", testName);
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, SA, "\"", "\\u0027\\u0027", testName);
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, ML, "\"", "\\u0027\\u0027", testName);
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        checkSkeleton(sc, MA, "\"", "\\u0027\\u0027", testName);
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Internal function to run a single skeleton test case.
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Run a single confusable skeleton transformation test case.
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void checkSkeleton(SpoofChecker sc, int type, String input, String expected, String testName) {
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String uInput = Utility.unescape(input);
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String uExpected = Utility.unescape(expected);
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String actual;
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        actual = sc.getSkeleton(type, uInput);
465f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        Throwable t = new Throwable();
466f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int lineNumberOfTest = t.getStackTrace()[1].getLineNumber();
467f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
468f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        assertEquals(testName + " test at line " + lineNumberOfTest + " :  Expected (escaped): " + expected, uExpected, actual);
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestAreConfusable() {
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s1 = "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. "
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                + "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. ";
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s2 = "A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. "
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                + "A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. ";
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.SINGLE_SCRIPT_CONFUSABLE, sc.areConfusable(s1, s2));
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestInvisible() {
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker sc = new SpoofChecker.Builder().build();
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s = Utility.unescape("abcd\\u0301ef");
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        SpoofChecker.CheckResult result = new SpoofChecker.CheckResult();
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.position = -42;
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertFalse("", sc.failsChecks(s, result));
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, result.checks);
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", result.position, 0);
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s2 = Utility.unescape("abcd\\u0301\\u0302\\u0301ef");
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", sc.failsChecks(s2, result));
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.INVISIBLE, result.checks);
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, result.position);
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Two acute accents, one from the composed a with acute accent, \u00e1,
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and one separate.
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        result.position = -42;
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String s3 = Utility.unescape("abcd\\u00e1\\u0301xyz");
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", sc.failsChecks(s3, result));
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", SpoofChecker.INVISIBLE, result.checks);
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", 0, result.position);
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestRestrictionLevel() {
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Object[][] tests = {
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"aγ♥", RestrictionLevel.UNRESTRICTIVE},
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"a", RestrictionLevel.ASCII},
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"γ", RestrictionLevel.SINGLE_SCRIPT_RESTRICTIVE},
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"aアー", RestrictionLevel.HIGHLY_RESTRICTIVE},
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"aऄ", RestrictionLevel.MODERATELY_RESTRICTIVE},
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"aγ", RestrictionLevel.MINIMALLY_RESTRICTIVE},
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        IdentifierInfo idInfo = new IdentifierInfo().setIdentifierProfile(SpoofChecker.RECOMMENDED);
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CheckResult checkResult = new CheckResult();
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (Object[] test : tests) {
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String testString = (String) test[0];
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            RestrictionLevel expectedLevel = (RestrictionLevel) test[1];
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            idInfo.setIdentifier(testString);
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("Testing restriction level for '" + testString + "'", expectedLevel, idInfo.getRestrictionLevel());
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (RestrictionLevel levelSetInSpoofChecker : RestrictionLevel.values()) {
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                SpoofChecker sc = new SpoofChecker.Builder()
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                .setChecks(SpoofChecker.RESTRICTION_LEVEL) // only check this
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                .setAllowedChars(SpoofChecker.RECOMMENDED)
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                .setRestrictionLevel(levelSetInSpoofChecker)
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                .build();
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean actualValue = sc.failsChecks(testString, checkResult);
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // we want to fail if the text is (say) MODERATE and the testLevel is ASCII
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean expectedFailure = expectedLevel.compareTo(levelSetInSpoofChecker) > 0 || !SpoofChecker.RECOMMENDED.containsAll(testString);
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean t = assertEquals("Testing spoof restriction level for '" + testString + "', " + levelSetInSpoofChecker, expectedFailure, actualValue);
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (!t) { // debugging
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    actualValue = sc.failsChecks(testString, checkResult);
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // we want to fail if the text is (say) MODERATE and the testLevel is ASCII
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    expectedFailure = expectedLevel.compareTo(levelSetInSpoofChecker) > 0 || !SpoofChecker.RECOMMENDED.containsAll(testString);
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestMixedNumbers() {
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Object[][] tests = {
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"1", "[0]"},
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"१", "[०]"},
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"1१", "[0०]"},
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"١۱", "[٠۰]"},
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        IdentifierInfo idInfo = new IdentifierInfo();
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CheckResult checkResult = new CheckResult();
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (Object[] test : tests) {
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String testString = (String) test[0];
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UnicodeSet expected = new UnicodeSet((String)test[1]);
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            idInfo.setIdentifier(testString);
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("", expected, idInfo.getNumerics());
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker sc = new SpoofChecker.Builder()
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .setChecks(SpoofChecker.MIXED_NUMBERS) // only check this
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .build();
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean actualValue = sc.failsChecks(testString, checkResult);
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("Testing spoof mixed numbers for '" + testString + "', ", expected.size() > 1, actualValue);
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestIdentifierInfo() {
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        contains(BitSet, BitSet)
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BitSet bitset12 = IdentifierInfo.set(new BitSet(), UScript.LATIN, UScript.HANGUL);
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BitSet bitset2 = IdentifierInfo.set(new BitSet(), UScript.HANGUL);
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", IdentifierInfo.contains(bitset12, bitset2));
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", IdentifierInfo.contains(bitset12, bitset12));
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", !IdentifierInfo.contains(bitset2, bitset12));
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertTrue("", IdentifierInfo.BITSET_COMPARATOR.compare(
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                IdentifierInfo.set(new BitSet(), UScript.ARABIC),
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                IdentifierInfo.set(new BitSet(), UScript.LATIN)) < 0);
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      displayAlternates(Collection<BitSet>)
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//      displayScripts(BitSet)
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String scriptString = IdentifierInfo.displayScripts(bitset12);
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", "Hang Latn", scriptString);
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Set<BitSet> alternates = new HashSet(Arrays.asList(bitset12, bitset2));
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String alternatesString = IdentifierInfo.displayAlternates(alternates);
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", "Hang; Hang Latn", alternatesString);
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        parseAlternates(String)
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        parseScripts(String)
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", bitset12, IdentifierInfo.parseScripts(scriptString));
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assertEquals("", alternates, IdentifierInfo.parseAlternates(alternatesString));
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String[][] tests = {
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // String, restriction-level, numerics, scripts, alternates, common-alternates
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"a♥",  "UNRESTRICTIVE", "[]", "Latn", "", ""},
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"a〆",  "HIGHLY_RESTRICTIVE", "[]", "Latn", "Hani Hira Kana", "Hani Hira Kana"},
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"aー〆",  "HIGHLY_RESTRICTIVE", "[]", "Latn", "Hira Kana", "Hira Kana"},
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"aー〆ア",  "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"アaー〆",  "HIGHLY_RESTRICTIVE", "[]", "Latn Kana", "", ""},
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"a1١",  "UNRESTRICTIVE", "[0٠]", "Latn", "Arab Thaa", "Arab Thaa"},
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"a1١۱",  "UNRESTRICTIVE", "[0٠۰]", "Latn Arab", "", ""},
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"١ー〆aア1१۱",  "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                {"aアー〆1१١۱",  "UNRESTRICTIVE", "[0٠۰०]", "Latn Kana Arab", "Deva Kthi Mahj", "Deva Kthi Mahj"},
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (String[] test : tests) {
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String testString = test[0];
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            IdentifierInfo idInfo = new IdentifierInfo();
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            idInfo.setIdentifierProfile(SpoofChecker.RECOMMENDED);
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            idInfo.setIdentifier(testString);
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("Identifier " + testString, testString, idInfo.getIdentifier());
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            RestrictionLevel restrictionLevel = RestrictionLevel.valueOf(test[1]);
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("RestrictionLevel " + testString, restrictionLevel, idInfo.getRestrictionLevel());
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UnicodeSet numerics = new UnicodeSet(test[2]);
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("Numerics " + testString, numerics, idInfo.getNumerics());
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BitSet scripts = IdentifierInfo.parseScripts(test[3]);
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("Scripts " + testString, scripts, idInfo.getScripts());
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Set<BitSet> alternates2 = IdentifierInfo.parseAlternates(test[4]);
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("Alternates " + testString, alternates2, idInfo.getAlternates());
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BitSet commonAlternates = IdentifierInfo.parseScripts(test[5]);
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertEquals("Common Alternates " + testString, commonAlternates, idInfo.getCommonAmongAlternates());
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// TODO
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        getIdentifierProfile()
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//        setIdentifierProfile(UnicodeSet)
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestComparator() {
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Random random = new Random(0);
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < 100; ++i) {
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BitSet[] items = new BitSet[random.nextInt(5)+3];
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int j = 0; j < items.length; ++j) {
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                items[j] = new BitSet();
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int countInBitset = random.nextInt(5);
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int k = 0; k < countInBitset; ++k) {
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    items[j].set(random.nextInt(10));
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            checkComparator(IdentifierInfo.BITSET_COMPARATOR, items);
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Dumb implementation for now
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private <T> void checkComparator(Comparator<T> comparator, T... items) {
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        logln("Checking " + Arrays.asList(items));
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * The relation is transitive: a < b and b < c implies a < c. We test here.
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * The relation is trichotomous: exactly one of a <  b, b < a and a = b is true. Guaranteed by comparator.
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < items.length-2; ++i) {
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            T a = items[i];
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int j = i+1; j < items.length-1; ++j) {
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                T b = items[j];
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int k = j+1; k < items.length; ++k) {
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    T c = items[k];
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    checkTransitivity(comparator, a, b, c);
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    checkTransitivity(comparator, a, c, b);
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    checkTransitivity(comparator, b, a, b);
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    checkTransitivity(comparator, b, c, a);
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    checkTransitivity(comparator, c, a, b);
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    checkTransitivity(comparator, c, b, a);
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private <T> void checkTransitivity(Comparator<T> comparator, T a, T b, T c) {
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ab = comparator.compare(a,b);
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int bc = comparator.compare(b,c);
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ca = comparator.compare(c,a);
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (!assertFalse("Transitive: " + a + ", " + b + ", " + c,
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ab < 0 && bc < 0 && ca <= 0)) {
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for debugging
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            comparator.compare(a,b);
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            comparator.compare(b,c);
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            comparator.compare(c,a);
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assertFalse("Transitive: " + a + ", " + b + ", " + c,
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ab < 0 && bc < 0 && ca <= 0);
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String parseHex(String in) {
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder sb = new StringBuilder();
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (String oneCharAsHexString : in.split("\\s+")) {
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (oneCharAsHexString.length() > 0) {
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                sb.appendCodePoint(Integer.parseInt(oneCharAsHexString, 16));
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return sb.toString();
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String escapeString(String in) {
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder out = new StringBuilder();
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < in.length(); i++) {
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c = in.codePointAt(i);
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (c <= 0x7f) {
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                out.append((char) c);
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (c <= 0xffff) {
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                out.append(String.format("\\u%04x", c));
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                out.append(String.format("\\U%06x", c));
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                i++;
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return out.toString();
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Verify that each item from the Unicode confusables.txt file
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // transforms into the expected skeleton.
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void testConfData() {
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (TestUtil.getJavaVendor() == JavaVendor.IBM && TestUtil.getJavaVersion() == 5) {
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Note: IBM Java 5 has a bug reading a large UTF-8 text contents
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            logln("Skip this test case because of the IBM Java 5 bug");
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Read in the confusables.txt file. (Distributed by Unicode.org)
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String fileName = "unicode/confusables.txt";
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BufferedReader confusablesRdr = TestUtil.getDataReader(fileName, "UTF-8");
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Create a default spoof checker to use in this test.
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            SpoofChecker sc = new SpoofChecker.Builder().build();
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Parse lines from the confusables.txt file. Example Line:
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // FF44 ; 0064 ; SL # ( d -> d ) FULLWIDTH ....
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Lines have three fields. The hex fields can contain more than one character,
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and each character may be more than 4 digits (for supplemntals)
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This regular expression matches lines and splits the fields into capture groups.
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Capture group 1: map from chars
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // 2: map to chars
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // 3: table type, SL, ML, SA or MA
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // 4: Comment Lines Only
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // 5: Error Lines Only
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Matcher parseLine = Pattern.compile(
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "\\ufeff?" + "(?:([0-9A-F\\s]+);([0-9A-F\\s]+);\\s*(SL|ML|SA|MA)\\s*(?:#.*?)?$)"
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            + "|\\ufeff?(\\s*(?:#.*)?)"). // Comment line
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            matcher("");
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Normalizer2 normalizer = Normalizer2.getNFDInstance();
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int lineNum = 0;
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String inputLine;
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while ((inputLine = confusablesRdr.readLine()) != null) {
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lineNum++;
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                parseLine.reset(inputLine);
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (!parseLine.matches()) {
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    errln("Syntax error in confusable data file at line " + lineNum);
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    errln(inputLine);
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (parseLine.group(4) != null) {
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue; // comment line
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String from = parseHex(parseLine.group(1));
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (!normalizer.isNormalized(from)) {
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // The source character was not NFD.
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Skip this case; the first step in obtaining a skeleton is to NFD the input,
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // so the mapping in this line of confusables.txt will never be applied.
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String rawExpected = parseHex(parseLine.group(2));
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String expected = normalizer.normalize(rawExpected);
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int skeletonType = 0;
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String tableType = parseLine.group(3);
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (tableType.equals("SL")) {
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    skeletonType = SpoofChecker.SINGLE_SCRIPT_CONFUSABLE;
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if (tableType.indexOf("SA") >= 0) {
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    skeletonType = SpoofChecker.SINGLE_SCRIPT_CONFUSABLE | SpoofChecker.ANY_CASE;
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if (tableType.indexOf("ML") >= 0) {
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    skeletonType = 0;
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if (tableType.indexOf("MA") >= 0) {
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    skeletonType = SpoofChecker.ANY_CASE;
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String actual;
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                actual = sc.getSkeleton(skeletonType, from);
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (!actual.equals(expected)) {
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    errln("confusables.txt: " + lineNum + ": " + parseLine.group(0));
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    errln("Actual: " + escapeString(actual));
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            confusablesRdr.close();
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (IOException e) {
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln(e.toString());
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
788