1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3 *******************************************************************************
4 * Copyright (C) 1996-2012, International Business Machines Corporation and    *
5 * others. All Rights Reserved.                                                *
6 *******************************************************************************
7 */
8package android.icu.dev.test.translit;
9
10import java.util.ArrayList;
11import java.util.Enumeration;
12import java.util.HashMap;
13import java.util.HashSet;
14import java.util.Iterator;
15import java.util.List;
16import java.util.Locale;
17import java.util.Map.Entry;
18
19import android.icu.dev.test.TestFmwk;
20import android.icu.dev.test.TestUtil;
21import android.icu.dev.util.UnicodeMap;
22import android.icu.impl.Utility;
23import android.icu.impl.UtilityExtensions;
24import android.icu.lang.CharSequences;
25import android.icu.lang.UCharacter;
26import android.icu.lang.UScript;
27import android.icu.text.CanonicalIterator;
28import android.icu.text.Normalizer2;
29import android.icu.text.Replaceable;
30import android.icu.text.ReplaceableString;
31import android.icu.text.StringTransform;
32import android.icu.text.Transliterator;
33import android.icu.text.UTF16;
34import android.icu.text.UnicodeFilter;
35import android.icu.text.UnicodeSet;
36import android.icu.text.UnicodeSetIterator;
37import android.icu.util.CaseInsensitiveString;
38import android.icu.util.ULocale;
39import org.junit.runner.RunWith;
40import android.icu.junit.IcuTestFmwkRunner;
41
42/***********************************************************************
43
44                     HOW TO USE THIS TEST FILE
45                               -or-
46                  How I developed on two platforms
47                without losing (too much of) my mind
48
49
501. Add new tests by copying/pasting/changing existing tests.  On Java,
51   any public void method named Test...() taking no parameters becomes
52   a test.  On C++, you need to modify the header and add a line to
53   the runIndexedTest() dispatch method.
54
552. Make liberal use of the expect() method; it is your friend.
56
573. The tests in this file exactly match those in a sister file on the
58   other side.  The two files are:
59
60   icu4j:  src/android.icu.dev.test/translit/TransliteratorTest.java
61   icu4c:  source/test/intltest/transtst.cpp
62
63                  ==> THIS IS THE IMPORTANT PART <==
64
65   When you add a test in this file, add it in transtst.cpp too.
66   Give it the same name and put it in the same relative place.  This
67   makes maintenance a lot simpler for any poor soul who ends up
68   trying to synchronize the tests between icu4j and icu4c.
69
704. If you MUST enter a test that is NOT paralleled in the sister file,
71   then add it in the special non-mirrored section.  These are
72   labeled
73
74     "icu4j ONLY"
75
76   or
77
78     "icu4c ONLY"
79
80   Make sure you document the reason the test is here and not there.
81
82
83Thank you.
84The Management
85 ***********************************************************************/
86
87/**
88 * @test
89 * @summary General test of Transliterator
90 */
91@RunWith(IcuTestFmwkRunner.class)
92public class TransliteratorTest extends TestFmwk {
93
94    public static void main(String[] args) throws Exception {
95        new TransliteratorTest().run(args);
96    }
97
98    public void TestHangul() {
99
100        Transliterator lh = Transliterator.getInstance("Latin-Hangul");
101        Transliterator hl = lh.getInverse();
102
103        assertTransform("Transform", "\uCE20", lh, "ch");
104
105        assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta");
106        assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa");
107        assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja");
108        assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka");
109        assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa");
110        assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha");
111        assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa");
112        assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja");
113        assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha");
114        assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga");
115        assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma");
116        assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba");
117        assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa");
118        assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta");
119        assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa");
120        assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha");
121        assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa");
122        assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga");
123        assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa");
124        assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja");
125        assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa");
126        assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta");
127        assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa");
128        assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa");
129        assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka");
130        assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa");
131
132    }
133
134    public void TestChinese() {
135        Transliterator hanLatin = Transliterator.getInstance("Han-Latin");
136        assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode");
137        assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D");
138    }
139
140    public void TestRegistry() {
141        checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound
142        checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound
143        checkRegistry("foo1", "[:letter:] a > b;");
144        for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
145            String id = (String) e.nextElement();
146            checkRegistry(id);
147        }
148    }
149
150    private void checkRegistry (String id, String rules) {
151        Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
152        Transliterator.registerInstance(foo);
153        checkRegistry(id);
154    }
155
156    private void checkRegistry(String id) {
157        Transliterator fie = Transliterator.getInstance(id);
158        final UnicodeSet fae = new UnicodeSet("[a-z5]");
159        fie.setFilter(fae);
160        Transliterator foe = Transliterator.getInstance(id);
161        UnicodeFilter fee = foe.getFilter();
162        if (fae.equals(fee)) {
163            errln("Changed what is in registry for " + id);
164        }
165    }
166
167    public void TestInstantiation() {
168        long ms = System.currentTimeMillis();
169        String ID;
170        for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
171            ID = (String) e.nextElement();
172            if (ID.equals("Latin-Han/definition")) {
173                System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later");
174                continue;
175            }
176            Transliterator t = null;
177            try {
178                t = Transliterator.getInstance(ID);
179                // This is only true for some subclasses
180                //                // We should get a new instance if we try again
181                //                Transliterator t2 = Transliterator.getInstance(ID);
182                //                if (t != t2) {
183                //                    logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t);
184                //                } else {
185                //                    errln("FAIL: " + ID + " returned identical instances");
186                //                    t = null;
187                //                }
188            } catch (IllegalArgumentException ex) {
189                errln("FAIL: " + ID);
190                throw ex;
191            }
192
193            //            if (t.getFilter() != null) {
194            //                errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter());
195            //            }
196
197            if (t != null) {
198                // Now test toRules
199                String rules = null;
200                try {
201                    rules = t.toRules(true);
202
203                    Transliterator.createFromRules("x", rules, Transliterator.FORWARD);
204                } catch (IllegalArgumentException ex2) {
205                    errln("FAIL: " + ID + ".toRules() => bad rules: " +
206                            rules);
207                    throw ex2;
208                }
209            }
210        }
211
212        // Now test the failure path
213        try {
214            ID = "<Not a valid Transliterator ID>";
215            Transliterator t = Transliterator.getInstance(ID);
216            errln("FAIL: " + ID + " returned " + t);
217        } catch (IllegalArgumentException ex) {
218            logln("OK: Bogus ID handled properly");
219        }
220
221        ms = System.currentTimeMillis() - ms;
222        logln("Elapsed time: " + ms + " ms");
223    }
224
225    public void TestSimpleRules() {
226        /* Example: rules 1. ab>x|y
227         *                2. yc>z
228         *
229         * []|eabcd  start - no match, copy e to tranlated buffer
230         * [e]|abcd  match rule 1 - copy output & adjust cursor
231         * [ex|y]cd  match rule 2 - copy output & adjust cursor
232         * [exz]|d   no match, copy d to transliterated buffer
233         * [exzd]|   done
234         */
235        expect("ab>x|y;" +
236                "yc>z",
237                "eabcd", "exzd");
238
239        /* Another set of rules:
240         *    1. ab>x|yzacw
241         *    2. za>q
242         *    3. qc>r
243         *    4. cw>n
244         *
245         * []|ab       Rule 1
246         * [x|yzacw]   No match
247         * [xy|zacw]   Rule 2
248         * [xyq|cw]    Rule 4
249         * [xyqn]|     Done
250         */
251        expect("ab>x|yzacw;" +
252                "za>q;" +
253                "qc>r;" +
254                "cw>n",
255                "ab", "xyqn");
256
257        /* Test categories
258         */
259        Transliterator t = Transliterator.createFromRules("<ID>",
260                "$dummy=\uE100;" +
261                "$vowel=[aeiouAEIOU];" +
262                "$lu=[:Lu:];" +
263                "$vowel } $lu > '!';" +
264                "$vowel > '&';" +
265                "'!' { $lu > '^';" +
266                "$lu > '*';" +
267                "a>ERROR",
268                Transliterator.FORWARD);
269        expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
270    }
271
272    /**
273     * Test inline set syntax and set variable syntax.
274     */
275    public void TestInlineSet() {
276        expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
277        expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
278
279        expect("$digit = [0-9];" +
280                "$alpha = [a-zA-Z];" +
281                "$alphanumeric = [$digit $alpha];" + // ***
282                "$special = [^$alphanumeric];" +     // ***
283                "$alphanumeric > '-';" +
284                "$special > '*';",
285
286                "thx-1138", "---*----");
287    }
288
289    /**
290     * Create some inverses and confirm that they work.  We have to be
291     * careful how we do this, since the inverses will not be true
292     * inverses -- we can't throw any random string at the composition
293     * of the transliterators and expect the identity function.  F x
294     * F' != I.  However, if we are careful about the input, we will
295     * get the expected results.
296     */
297    public void TestRuleBasedInverse() {
298        String RULES =
299            "abc>zyx;" +
300            "ab>yz;" +
301            "bc>zx;" +
302            "ca>xy;" +
303            "a>x;" +
304            "b>y;" +
305            "c>z;" +
306
307            "abc<zyx;" +
308            "ab<yz;" +
309            "bc<zx;" +
310            "ca<xy;" +
311            "a<x;" +
312            "b<y;" +
313            "c<z;" +
314
315            "";
316
317        String[] DATA = {
318                // Careful here -- random strings will not work.  If we keep
319                // the left side to the domain and the right side to the range
320                // we will be okay though (left, abc; right xyz).
321                "a", "x",
322                "abcacab", "zyxxxyy",
323                "caccb", "xyzzy",
324        };
325
326        Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
327        Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE);
328        for (int i=0; i<DATA.length; i+=2) {
329            expect(fwd, DATA[i], DATA[i+1]);
330            expect(rev, DATA[i+1], DATA[i]);
331        }
332    }
333
334    /**
335     * Basic test of keyboard.
336     */
337    public void TestKeyboard() {
338        Transliterator t = Transliterator.createFromRules("<ID>",
339                "psch>Y;"
340                +"ps>y;"
341                +"ch>x;"
342                +"a>A;", Transliterator.FORWARD);
343        String DATA[] = {
344                // insertion, buffer
345                "a", "A",
346                "p", "Ap",
347                "s", "Aps",
348                "c", "Apsc",
349                "a", "AycA",
350                "psch", "AycAY",
351                null, "AycAY", // null means finishKeyboardTransliteration
352        };
353
354        keyboardAux(t, DATA);
355    }
356
357    /**
358     * Basic test of keyboard with cursor.
359     */
360    public void TestKeyboard2() {
361        Transliterator t = Transliterator.createFromRules("<ID>",
362                "ych>Y;"
363                +"ps>|y;"
364                +"ch>x;"
365                +"a>A;", Transliterator.FORWARD);
366        String DATA[] = {
367                // insertion, buffer
368                "a", "A",
369                "p", "Ap",
370                "s", "Aps", // modified for rollback - "Ay",
371                "c", "Apsc", // modified for rollback - "Ayc",
372                "a", "AycA",
373                "p", "AycAp",
374                "s", "AycAps", // modified for rollback - "AycAy",
375                "c", "AycApsc", // modified for rollback - "AycAyc",
376                "h", "AycAY",
377                null, "AycAY", // null means finishKeyboardTransliteration
378        };
379
380        keyboardAux(t, DATA);
381    }
382
383    /**
384     * Test keyboard transliteration with back-replacement.
385     */
386    public void TestKeyboard3() {
387        // We want th>z but t>y.  Furthermore, during keyboard
388        // transliteration we want t>y then yh>z if t, then h are
389        // typed.
390        String RULES =
391            "t>|y;" +
392            "yh>z;" +
393            "";
394
395        String[] DATA = {
396                // Column 1: characters to add to buffer (as if typed)
397                // Column 2: expected appearance of buffer after
398                //           keyboard xliteration.
399                "a", "a",
400                "b", "ab",
401                "t", "abt", // modified for rollback - "aby",
402                "c", "abyc",
403                "t", "abyct", // modified for rollback - "abycy",
404                "h", "abycz",
405                null, "abycz", // null means finishKeyboardTransliteration
406        };
407
408        Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD);
409        keyboardAux(t, DATA);
410    }
411
412    private void keyboardAux(Transliterator t, String[] DATA) {
413        Transliterator.Position index = new Transliterator.Position();
414        ReplaceableString s = new ReplaceableString();
415        for (int i=0; i<DATA.length; i+=2) {
416            StringBuffer log;
417            if (DATA[i] != null) {
418                log = new StringBuffer(s.toString() + " + "
419                        + DATA[i]
420                               + " -> ");
421                t.transliterate(s, index, DATA[i]);
422            } else {
423                log = new StringBuffer(s.toString() + " => ");
424                t.finishTransliteration(s, index);
425            }
426            UtilityExtensions.formatInput(log, s, index);
427            if (s.toString().equals(DATA[i+1])) {
428                logln(log.toString());
429            } else {
430                errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
431            }
432        }
433    }
434
435    // Latin-Arabic has been temporarily removed until it can be
436    // done correctly.
437
438    //  public void TestArabic() {
439    //      String DATA[] = {
440    //          "Arabic",
441    //              "\u062a\u062a\u0645\u062a\u0639 "+
442    //              "\u0627\u0644\u0644\u063a\u0629 "+
443    //              "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
444    //              "\u0628\u0628\u0646\u0638\u0645 "+
445    //              "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
446    //              "\u062c\u0645\u064a\u0644\u0629"
447    //      };
448
449    //      Transliterator t = Transliterator.getInstance("Latin-Arabic");
450    //      for (int i=0; i<DATA.length; i+=2) {
451    //          expect(t, DATA[i], DATA[i+1]);
452    //      }
453    //  }
454
455    /**
456     * Compose the Kana transliterator forward and reverse and try
457     * some strings that should come out unchanged.
458     */
459    public void TestCompoundKana() {
460        Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin");
461        expect(t, "aaaaa", "aaaaa");
462    }
463
464    /**
465     * Compose the hex transliterators forward and reverse.
466     */
467    public void TestCompoundHex() {
468        Transliterator a = Transliterator.getInstance("Any-Hex");
469        Transliterator b = Transliterator.getInstance("Hex-Any");
470        // Transliterator[] trans = { a, b };
471        // Transliterator ab = Transliterator.getInstance(trans);
472        Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any");
473
474        // Do some basic tests of b
475        expect(b, "\\u0030\\u0031", "01");
476
477        String s = "abcde";
478        expect(ab, s, s);
479
480        // trans = new Transliterator[] { b, a };
481        // Transliterator ba = Transliterator.getInstance(trans);
482        Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex");
483        ReplaceableString str = new ReplaceableString(s);
484        a.transliterate(str);
485        expect(ba, str.toString(), str.toString());
486    }
487
488    /**
489     * Do some basic tests of filtering.
490     */
491    public void TestFiltering() {
492
493        Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD);
494        tempTrans.setFilter(new UnicodeSet("[a]"));
495        String tempResult = tempTrans.transform("xa");
496        assertEquals("context should not be filtered ", "xb", tempResult);
497
498        tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD);
499        tempResult = tempTrans.transform("xa");
500        assertEquals("context should not be filtered ", "xb", tempResult);
501
502        Transliterator hex = Transliterator.getInstance("Any-Hex");
503        hex.setFilter(new UnicodeFilter() {
504            public boolean contains(int c) {
505                return c != 'c';
506            }
507            public String toPattern(boolean escapeUnprintable) {
508                return "";
509            }
510            public boolean matchesIndexValue(int v) {
511                return false;
512            }
513            public void addMatchSetTo(UnicodeSet toUnionTo) {}
514        });
515        String s = "abcde";
516        String out = hex.transliterate(s);
517        String exp = "\\u0061\\u0062c\\u0064\\u0065";
518        if (out.equals(exp)) {
519            logln("Ok:   \"" + exp + "\"");
520        } else {
521            logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
522        }
523    }
524
525    /**
526     * Test anchors
527     */
528    public void TestAnchors() {
529        expect("^ab  > 01 ;" +
530                " ab  > |8 ;" +
531                "  b  > k ;" +
532                " 8x$ > 45 ;" +
533                " 8x  > 77 ;",
534
535                "ababbabxabx",
536        "018k7745");
537        expect("$s = [z$] ;" +
538                "$s{ab    > 01 ;" +
539                "   ab    > |8 ;" +
540                "    b    > k ;" +
541                "   8x}$s > 45 ;" +
542                "   8x    > 77 ;",
543
544                "abzababbabxzabxabx",
545        "01z018k45z01x45");
546    }
547
548    /**
549     * Test pattern quoting and escape mechanisms.
550     */
551    public void TestPatternQuoting() {
552        // Array of 3n items
553        // Each item is <rules>, <input>, <expected output>
554        String[] DATA = {
555                "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
556        };
557
558        for (int i=0; i<DATA.length; i+=3) {
559            logln("Pattern: " + Utility.escape(DATA[i]));
560            Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
561            expect(t, DATA[i+1], DATA[i+2]);
562        }
563    }
564
565    public void TestVariableNames() {
566        Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD);
567        if (gl == null) {
568            errln("FAIL: null Transliterator returned.");
569        }
570    }
571
572    /**
573     * Regression test for bugs found in Greek transliteration.
574     */
575    public void TestJ277() {
576        Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
577
578        char sigma = (char)0x3C3;
579        char upsilon = (char)0x3C5;
580        char nu = (char)0x3BD;
581        // not used char PHI = (char)0x3A6;
582        char alpha = (char)0x3B1;
583        // not used char omega = (char)0x3C9;
584        // not used char omicron = (char)0x3BF;
585        // not used char epsilon = (char)0x3B5;
586
587        // sigma upsilon nu -> syn
588        StringBuffer buf = new StringBuffer();
589        buf.append(sigma).append(upsilon).append(nu);
590        String syn = buf.toString();
591        expect(gl, syn, "syn");
592
593        // sigma alpha upsilon nu -> saun
594        buf.setLength(0);
595        buf.append(sigma).append(alpha).append(upsilon).append(nu);
596        String sayn = buf.toString();
597        expect(gl, sayn, "saun");
598
599        // Again, using a smaller rule set
600        String rules =
601            "$alpha   = \u03B1;" +
602            "$nu      = \u03BD;" +
603            "$sigma   = \u03C3;" +
604            "$ypsilon = \u03C5;" +
605            "$vowel   = [aeiouAEIOU$alpha$ypsilon];" +
606            "s <>           $sigma;" +
607            "a <>           $alpha;" +
608            "u <>  $vowel { $ypsilon;" +
609            "y <>           $ypsilon;" +
610            "n <>           $nu;";
611        Transliterator mini = Transliterator.createFromRules
612        ("mini", rules, Transliterator.REVERSE);
613        expect(mini, syn, "syn");
614        expect(mini, sayn, "saun");
615
616        //|    // Transliterate the Greek locale data
617        //|    Locale el("el");
618        //|    DateFormatSymbols syms(el, status);
619        //|    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
620        //|    int32_t i, count;
621        //|    const UnicodeString* data = syms.getMonths(count);
622        //|    for (i=0; i<count; ++i) {
623        //|        if (data[i].length() == 0) {
624        //|            continue;
625        //|        }
626        //|        UnicodeString out(data[i]);
627        //|        gl->transliterate(out);
628        //|        bool_t ok = TRUE;
629        //|        if (data[i].length() >= 2 && out.length() >= 2 &&
630        //|            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
631        //|            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
632        //|                ok = FALSE;
633        //|            }
634        //|        }
635        //|        if (ok) {
636        //|            logln(prettify(data[i] + " -> " + out));
637        //|        } else {
638        //|            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
639        //|        }
640        //|    }
641    }
642
643    //    /**
644    //     * Prefix, suffix support in hex transliterators
645    //     */
646    //    public void TestJ243() {
647    //        // Test default Hex-Any, which should handle
648    //        // \\u, \\U, u+, and U+
649    //        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
650    //        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
651    //
652    //        // Try a custom Hex-Any
653    //        // \\uXXXX and &#xXXXX;
654    //        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
655    //        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",
656    //               "abcd5fx012&#x00033;");
657    //
658    //        // Try custom Any-Hex (default is tested elsewhere)
659    //        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
660    //        expect(hex3, "012", "&#x30;&#x31;&#x32;");
661    //    }
662
663    public void TestJ329() {
664
665        Object[] DATA = {
666                Boolean.FALSE, "a > b; c > d",
667                Boolean.TRUE,  "a > b; no operator; c > d",
668        };
669
670        for (int i=0; i<DATA.length; i+=2) {
671            String err = null;
672            try {
673                Transliterator.createFromRules("<ID>",
674                        (String) DATA[i+1],
675                        Transliterator.FORWARD);
676            } catch (IllegalArgumentException e) {
677                err = e.getMessage();
678            }
679            boolean gotError = (err != null);
680            String desc = (String) DATA[i+1] +
681            (gotError ? (" -> error: " + err) : " -> no error");
682            if ((err != null) == ((Boolean)DATA[i]).booleanValue()) {
683                logln("Ok:   " + desc);
684            } else {
685                errln("FAIL: " + desc);
686            }
687        }
688    }
689
690    /**
691     * Test segments and segment references.
692     */
693    public void TestSegments() {
694        // Array of 3n items
695        // Each item is <rules>, <input>, <expected output>
696        String[] DATA = {
697                "([a-z]) '.' ([0-9]) > $2 '-' $1",
698                "abc.123.xyz.456",
699                "ab1-c23.xy4-z56",
700        };
701
702        for (int i=0; i<DATA.length; i+=3) {
703            logln("Pattern: " + Utility.escape(DATA[i]));
704            Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
705            expect(t, DATA[i+1], DATA[i+2]);
706        }
707    }
708
709    /**
710     * Test cursor positioning outside of the key
711     */
712    public void TestCursorOffset() {
713        // Array of 3n items
714        // Each item is <rules>, <input>, <expected output>
715        String[] DATA = {
716                "pre {alpha} post > | @ ALPHA ;" +
717                "eALPHA > beta ;" +
718                "pre {beta} post > BETA @@ | ;" +
719                "post > xyz",
720
721                "prealphapost prebetapost",
722                "prbetaxyz preBETApost",
723        };
724
725        for (int i=0; i<DATA.length; i+=3) {
726            logln("Pattern: " + Utility.escape(DATA[i]));
727            Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
728            expect(t, DATA[i+1], DATA[i+2]);
729        }
730    }
731
732    /**
733     * Test zero length and > 1 char length variable values.  Test
734     * use of variable refs in UnicodeSets.
735     */
736    public void TestArbitraryVariableValues() {
737        // Array of 3n items
738        // Each item is <rules>, <input>, <expected output>
739        String[] DATA = {
740                "$abe = ab;" +
741                "$pat = x[yY]z;" +
742                "$ll  = 'a-z';" +
743                "$llZ = [$ll];" +
744                "$llY = [$ll$pat];" +
745                "$emp = ;" +
746
747                "$abe > ABE;" +
748                "$pat > END;" +
749                "$llZ > 1;" +
750                "$llY > 2;" +
751                "7$emp 8 > 9;" +
752                "",
753
754                "ab xYzxyz stY78",
755                "ABE ENDEND 1129",
756        };
757
758        for (int i=0; i<DATA.length; i+=3) {
759            logln("Pattern: " + Utility.escape(DATA[i]));
760            Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD);
761            expect(t, DATA[i+1], DATA[i+2]);
762        }
763    }
764
765    /**
766     * Confirm that the contextStart, contextLimit, start, and limit
767     * behave correctly.
768     */
769    public void TestPositionHandling() {
770        // Array of 3n items
771        // Each item is <rules>, <input>, <expected output>
772        String[] DATA = {
773                "a{t} > SS ; {t}b > UU ; {t} > TT ;",
774                "xtat txtb", // pos 0,9,0,9
775                "xTTaSS TTxUUb",
776
777                "a{t} > SS ; {t}b > UU ; {t} > TT ;",
778                "xtat txtb", // pos 2,9,3,8
779                "xtaSS TTxUUb",
780
781                "a{t} > SS ; {t}b > UU ; {t} > TT ;",
782                "xtat txtb", // pos 3,8,3,8
783                "xtaTT TTxTTb",
784        };
785
786        // Array of 4n positions -- these go with the DATA array
787        // They are: contextStart, contextLimit, start, limit
788        int[] POS = {
789                0, 9, 0, 9,
790                2, 9, 3, 8,
791                3, 8, 3, 8,
792        };
793
794        int n = DATA.length/3;
795        for (int i=0; i<n; i++) {
796            Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD);
797            Transliterator.Position pos = new Transliterator.Position(
798                    POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]);
799            ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]);
800            t.transliterate(rsource, pos);
801            t.finishTransliteration(rsource, pos);
802            String result = rsource.toString();
803            String exp = DATA[3*i+2];
804            expectAux(Utility.escape(DATA[3*i]),
805                    DATA[3*i+1],
806                    result,
807                    result.equals(exp),
808                    exp);
809        }
810    }
811
812    /**
813     * Test the Hiragana-Katakana transliterator.
814     */
815    public void TestHiraganaKatakana() {
816        Transliterator hk = Transliterator.getInstance("Hiragana-Katakana");
817        Transliterator kh = Transliterator.getInstance("Katakana-Hiragana");
818
819        // Array of 3n items
820        // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
821        String[] DATA = {
822                "both",
823                "\u3042\u3090\u3099\u3092\u3050",
824                "\u30A2\u30F8\u30F2\u30B0",
825
826                "kh",
827                "\u307C\u3051\u3060\u3042\u3093\u30FC",
828                "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC",
829        };
830
831        for (int i=0; i<DATA.length; i+=3) {
832            switch (DATA[i].charAt(0)) {
833            case 'h': // Hiragana-Katakana
834                expect(hk, DATA[i+1], DATA[i+2]);
835                break;
836            case 'k': // Katakana-Hiragana
837                expect(kh, DATA[i+2], DATA[i+1]);
838                break;
839            case 'b': // both
840                expect(hk, DATA[i+1], DATA[i+2]);
841                expect(kh, DATA[i+2], DATA[i+1]);
842                break;
843            }
844        }
845
846    }
847
848    public void TestCopyJ476() {
849        // This is a C++-only copy constructor test
850    }
851
852    /**
853     * Test inter-Indic transliterators.  These are composed.
854     */
855    public void TestInterIndic() {
856        String ID = "Devanagari-Gujarati";
857        Transliterator dg = Transliterator.getInstance(ID);
858        if (dg == null) {
859            errln("FAIL: getInstance(" + ID + ") returned null");
860            return;
861        }
862        String id = dg.getID();
863        if (!id.equals(ID)) {
864            errln("FAIL: getInstance(" + ID + ").getID() => " + id);
865        }
866        String dev = "\u0901\u090B\u0925";
867        String guj = "\u0A81\u0A8B\u0AA5";
868        expect(dg, dev, guj);
869    }
870
871    /**
872     * Test filter syntax in IDs. (J23)
873     */
874    public void TestFilterIDs() {
875        String[] DATA = {
876                "[aeiou]Any-Hex", // ID
877                "[aeiou]Hex-Any", // expected inverse ID
878                "quizzical",      // src
879                "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
880
881                "[aeiou]Any-Hex;[^5]Hex-Any",
882                "[^5]Any-Hex;[aeiou]Hex-Any",
883                "quizzical",
884                "q\\u0075izzical",
885
886                "[abc]Null",
887                "[abc]Null",
888                "xyz",
889                "xyz",
890        };
891
892        for (int i=0; i<DATA.length; i+=4) {
893            String ID = DATA[i];
894            Transliterator t = Transliterator.getInstance(ID);
895            expect(t, DATA[i+2], DATA[i+3]);
896
897            // Check the ID
898            if (!ID.equals(t.getID())) {
899                errln("FAIL: getInstance(" + ID + ").getID() => " +
900                        t.getID());
901            }
902
903            // Check the inverse
904            String uID = DATA[i+1];
905            Transliterator u = t.getInverse();
906            if (u == null) {
907                errln("FAIL: " + ID + ".getInverse() returned NULL");
908            } else if (!u.getID().equals(uID)) {
909                errln("FAIL: " + ID + ".getInverse().getID() => " +
910                        u.getID() + ", expected " + uID);
911            }
912        }
913    }
914
915    /**
916     * Test the case mapping transliterators.
917     */
918    public void TestCaseMap() {
919        Transliterator toUpper =
920            Transliterator.getInstance("Any-Upper[^xyzXYZ]");
921        Transliterator toLower =
922            Transliterator.getInstance("Any-Lower[^xyzXYZ]");
923        Transliterator toTitle =
924            Transliterator.getInstance("Any-Title[^xyzXYZ]");
925
926        expect(toUpper, "The quick brown fox jumped over the lazy dogs.",
927        "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
928        expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
929        "the quick brown foX jumped over the lazY dogs.");
930        expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.",
931        "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
932    }
933
934    /**
935     * Test the name mapping transliterators.
936     */
937    public void TestNameMap() {
938        Transliterator uni2name =
939            Transliterator.getInstance("Any-Name[^abc]");
940        Transliterator name2uni =
941            Transliterator.getInstance("Name-Any");
942
943        expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
944        "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
945        expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
946        "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
947
948        // round trip
949        Transliterator t = Transliterator.getInstance("Any-Name;Name-Any");
950
951        String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
952        expect(t, s, s);
953    }
954
955    /**
956     * Test liberalized ID syntax.  1006c
957     */
958    public void TestLiberalizedID() {
959        // Some test cases have an expected getID() value of NULL.  This
960        // means I have disabled the test case for now.  This stuff is
961        // still under development, and I haven't decided whether to make
962        // getID() return canonical case yet.  It will all get rewritten
963        // with the move to Source-Target/Variant IDs anyway. [aliu]
964        String DATA[] = {
965                "latin-greek", null /*"Latin-Greek"*/, "case insensitivity",
966                "  Null  ", "Null", "whitespace",
967                " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
968                "  null  ; latin-greek  ", null /*"Null;Latin-Greek"*/, "compound whitespace",
969        };
970
971        for (int i=0; i<DATA.length; i+=3) {
972            try {
973                Transliterator t = Transliterator.getInstance(DATA[i]);
974                if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) {
975                    logln("Ok: " + DATA[i+2] +
976                            " create ID \"" + DATA[i] + "\" => \"" +
977                            t.getID() + "\"");
978                } else {
979                    errln("FAIL: " + DATA[i+2] +
980                            " create ID \"" + DATA[i] + "\" => \"" +
981                            t.getID() + "\", exp \"" + DATA[i+1] + "\"");
982                }
983            } catch (IllegalArgumentException e) {
984                errln("FAIL: " + DATA[i+2] +
985                        " create ID \"" + DATA[i] + "\"");
986            }
987        }
988    }
989
990    public void TestCreateInstance() {
991        String FORWARD = "F";
992        String REVERSE = "R";
993        String DATA[] = {
994                // Column 1: id
995                // Column 2: direction
996                // Column 3: expected ID, or "" if expect failure
997                "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
998
999                // JB#2689: bad compound causes crash
1000                "InvalidSource-InvalidTarget", FORWARD, "",
1001                "InvalidSource-InvalidTarget", REVERSE, "",
1002                "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1003                "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1004                "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1005                "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1006
1007                null
1008        };
1009
1010        for (int i=0; DATA[i]!=null; i+=3) {
1011            String id=DATA[i];
1012            int dir = (DATA[i+1]==FORWARD)?
1013                    Transliterator.FORWARD:Transliterator.REVERSE;
1014            String expID=DATA[i+2];
1015            Exception e = null;
1016            Transliterator t;
1017            try {
1018                t = Transliterator.getInstance(id,dir);
1019            } catch (Exception e1) {
1020                e = e1;
1021                t = null;
1022            }
1023            String newID = (t!=null)?t.getID():"";
1024            boolean ok = (newID.equals(expID));
1025            if (t==null) {
1026                newID = e.getMessage();
1027            }
1028            if (ok) {
1029                logln("Ok: createInstance(" +
1030                        id + "," + DATA[i+1] + ") => " + newID);
1031            } else {
1032                errln("FAIL: createInstance(" +
1033                        id + "," + DATA[i+1] + ") => " + newID +
1034                        ", expected " + expID);
1035            }
1036        }
1037    }
1038
1039    /**
1040     * Test the normalization transliterator.
1041     */
1042    public void TestNormalizationTransliterator() {
1043        // THE FOLLOWING TWO TABLES ARE COPIED FROM android.icu.dev.test.normalizer.BasicTest
1044        // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1045        String[][] CANON = {
1046                // Input               Decomposed            Composed
1047                {"cat",                "cat",                "cat"               },
1048                {"\u00e0ardvark",      "a\u0300ardvark",     "\u00e0ardvark"     },
1049
1050                {"\u1e0a",             "D\u0307",            "\u1e0a"            }, // D-dot_above
1051                {"D\u0307",            "D\u0307",            "\u1e0a"            }, // D dot_above
1052
1053                {"\u1e0c\u0307",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_below dot_above
1054                {"\u1e0a\u0323",       "D\u0323\u0307",      "\u1e0c\u0307"      }, // D-dot_above dot_below
1055                {"D\u0307\u0323",      "D\u0323\u0307",      "\u1e0c\u0307"      }, // D dot_below dot_above
1056
1057                {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
1058                {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
1059
1060                {"\u1E14",             "E\u0304\u0300",      "\u1E14"            }, // E-macron-grave
1061                {"\u0112\u0300",       "E\u0304\u0300",      "\u1E14"            }, // E-macron + grave
1062                {"\u00c8\u0304",       "E\u0300\u0304",      "\u00c8\u0304"      }, // E-grave + macron
1063
1064                {"\u212b",             "A\u030a",            "\u00c5"            }, // angstrom_sign
1065                {"\u00c5",             "A\u030a",            "\u00c5"            }, // A-ring
1066
1067                {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated with 3.0
1068                {"\u00fd\uFB03n",      "y\u0301\uFB03n",     "\u00fd\uFB03n"     }, //updated with 3.0
1069
1070                {"Henry IV",           "Henry IV",           "Henry IV"          },
1071                {"Henry \u2163",       "Henry \u2163",       "Henry \u2163"      },
1072
1073                {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1074                {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1075                {"\uFF76\uFF9E",       "\uFF76\uFF9E",       "\uFF76\uFF9E"      }, // hw_ka + hw_ten
1076                {"\u30AB\uFF9E",       "\u30AB\uFF9E",       "\u30AB\uFF9E"      }, // ka + hw_ten
1077                {"\uFF76\u3099",       "\uFF76\u3099",       "\uFF76\u3099"      }, // hw_ka + ten
1078
1079                {"A\u0300\u0316",      "A\u0316\u0300",      "\u00C0\u0316"      },
1080        };
1081
1082        String[][] COMPAT = {
1083                // Input               Decomposed            Composed
1084                {"\uFB4f",             "\u05D0\u05DC",       "\u05D0\u05DC"      }, // Alef-Lamed vs. Alef, Lamed
1085
1086                {"\u00fdffin",         "y\u0301ffin",        "\u00fdffin"        }, //updated for 3.0
1087                {"\u00fd\uFB03n",      "y\u0301ffin",        "\u00fdffin"        }, // ffi ligature -> f + f + i
1088
1089                {"Henry IV",           "Henry IV",           "Henry IV"          },
1090                {"Henry \u2163",       "Henry IV",           "Henry IV"          },
1091
1092                {"\u30AC",             "\u30AB\u3099",       "\u30AC"            }, // ga (Katakana)
1093                {"\u30AB\u3099",       "\u30AB\u3099",       "\u30AC"            }, // ka + ten
1094
1095                {"\uFF76\u3099",       "\u30AB\u3099",       "\u30AC"            }, // hw_ka + ten
1096        };
1097
1098        Transliterator NFD = Transliterator.getInstance("NFD");
1099        Transliterator NFC = Transliterator.getInstance("NFC");
1100        for (int i=0; i<CANON.length; ++i) {
1101            String in = CANON[i][0];
1102            String expd = CANON[i][1];
1103            String expc = CANON[i][2];
1104            expect(NFD, in, expd);
1105            expect(NFC, in, expc);
1106        }
1107
1108        Transliterator NFKD = Transliterator.getInstance("NFKD");
1109        Transliterator NFKC = Transliterator.getInstance("NFKC");
1110        for (int i=0; i<COMPAT.length; ++i) {
1111            String in = COMPAT[i][0];
1112            String expkd = COMPAT[i][1];
1113            String expkc = COMPAT[i][2];
1114            expect(NFKD, in, expkd);
1115            expect(NFKC, in, expkc);
1116        }
1117
1118        Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
1119        expect(t, "\u010dx", "c\u030C");
1120    }
1121
1122    /**
1123     * Test compound RBT rules.
1124     */
1125    public void TestCompoundRBT() {
1126        // Careful with spacing and ';' here:  Phrase this exactly
1127        // as toRules() is going to return it.  If toRules() changes
1128        // with regard to spacing or ';', then adjust this string.
1129        String rule = "::Hex-Any;\n" +
1130        "::Any-Lower;\n" +
1131        "a > '.A.';\n" +
1132        "b > '.B.';\n" +
1133        "::[^t]Any-Upper;";
1134        Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
1135        if (t == null) {
1136            errln("FAIL: createFromRules failed");
1137            return;
1138        }
1139        expect(t, "\u0043at in the hat, bat on the mat",
1140        "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1141        String r = t.toRules(true);
1142        if (r.equals(rule)) {
1143            logln("OK: toRules() => " + r);
1144        } else {
1145            errln("FAIL: toRules() => " + r +
1146                    ", expected " + rule);
1147        }
1148
1149        // Now test toRules
1150        t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD);
1151        if (t == null) {
1152            errln("FAIL: createInstance failed");
1153            return;
1154        }
1155        String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
1156        r = t.toRules(true);
1157        if (!r.equals(exp)) {
1158            errln("FAIL: toRules() => " + r +
1159                    ", expected " + exp);
1160        } else {
1161            logln("OK: toRules() => " + r);
1162        }
1163
1164        // Round trip the result of toRules
1165        t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD);
1166        if (t == null) {
1167            errln("FAIL: createFromRules #2 failed");
1168            return;
1169        } else {
1170            logln("OK: createFromRules(" + r + ") succeeded");
1171        }
1172
1173        // Test toRules again
1174        r = t.toRules(true);
1175        if (!r.equals(exp)) {
1176            errln("FAIL: toRules() => " + r +
1177                    ", expected " + exp);
1178        } else {
1179            logln("OK: toRules() => " + r);
1180        }
1181
1182        // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1183        // to what the regenerated ID will look like.
1184        String id = "Upper(Lower);(NFKC)";
1185        t = Transliterator.getInstance(id, Transliterator.FORWARD);
1186        if (t == null) {
1187            errln("FAIL: createInstance #2 failed");
1188            return;
1189        }
1190        if (t.getID().equals(id)) {
1191            logln("OK: created " + id);
1192        } else {
1193            errln("FAIL: createInstance(" + id +
1194                    ").getID() => " + t.getID());
1195        }
1196
1197        Transliterator u = t.getInverse();
1198        if (u == null) {
1199            errln("FAIL: createInverse failed");
1200            return;
1201        }
1202        exp = "NFKC();Lower(Upper)";
1203        if (u.getID().equals(exp)) {
1204            logln("OK: createInverse(" + id + ") => " +
1205                    u.getID());
1206        } else {
1207            errln("FAIL: createInverse(" + id + ") => " +
1208                    u.getID());
1209        }
1210    }
1211
1212    /**
1213     * Compound filter semantics were orginially not implemented
1214     * correctly.  Originally, each component filter f(i) is replaced by
1215     * f'(i) = f(i) && g, where g is the filter for the compound
1216     * transliterator.
1217     *
1218     * From Mark:
1219     *
1220     * Suppose and I have a transliterator X. Internally X is
1221     * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1222     *
1223     * The compound should convert all greek characters (through latin) to
1224     * cyrillic, then lowercase the result. The filter should say "don't
1225     * touch 'A' in the original". But because an intermediate result
1226     * happens to go through "A", the Greek Alpha gets hung up.
1227     */
1228    public void TestCompoundFilter() {
1229        Transliterator t = Transliterator.getInstance
1230        ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD);
1231        t.setFilter(new UnicodeSet("[^A]"));
1232
1233        // Only the 'A' at index 1 should remain unchanged
1234        expect(t,
1235                CharsToUnicodeString("BA\\u039A\\u0391"),
1236                CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1237    }
1238
1239    /**
1240     * Test the "Remove" transliterator.
1241     */
1242    public void TestRemove() {
1243        Transliterator t = Transliterator.getInstance("Remove[aeiou]");
1244        expect(t, "The quick brown fox.",
1245        "Th qck brwn fx.");
1246    }
1247
1248    public void TestToRules() {
1249        String RBT = "rbt";
1250        String SET = "set";
1251        String[] DATA = {
1252                RBT,
1253                "$a=\\u4E61; [$a] > A;",
1254                "[\\u4E61] > A;",
1255
1256                RBT,
1257                "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1258                "[[:Zs:][:Zl:]]{a} > A;",
1259
1260                SET,
1261                "[[:Zs:][:Zl:]]",
1262                "[[:Zs:][:Zl:]]",
1263
1264                SET,
1265                "[:Ps:]",
1266                "[:Ps:]",
1267
1268                SET,
1269                "[:L:]",
1270                "[:L:]",
1271
1272                SET,
1273                "[[:L:]-[A]]",
1274                "[[:L:]-[A]]",
1275
1276                SET,
1277                "[~[:Lu:][:Ll:]]",
1278                "[~[:Lu:][:Ll:]]",
1279
1280                SET,
1281                "[~[a-z]]",
1282                "[~[a-z]]",
1283
1284                RBT,
1285                "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1286                "[^[:Zs:]]{a} > A;",
1287
1288                RBT,
1289                "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1290                "[[a-z]-[:Zs:]]{a} > A;",
1291
1292                RBT,
1293                "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1294                "[[:Zs:]&[a-z]]{a} > A;",
1295
1296                RBT,
1297                "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1298                "[x[:Zs:]]{a} > A;",
1299
1300                RBT,
1301                "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+
1302                "$macron = \\u0304 ;"+
1303                "$evowel = [aeiouyAEIOUY] ;"+
1304                "$iotasub = \\u0345 ;"+
1305                "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1306                "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1307
1308                RBT,
1309                "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1310                "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1311        };
1312
1313        for (int d=0; d < DATA.length; d+=3) {
1314            if (DATA[d] == RBT) {
1315                // Transliterator test
1316                Transliterator t = Transliterator.createFromRules("ID",
1317                        DATA[d+1], Transliterator.FORWARD);
1318                if (t == null) {
1319                    errln("FAIL: createFromRules failed");
1320                    return;
1321                }
1322                String rules, escapedRules;
1323                rules = t.toRules(false);
1324                escapedRules = t.toRules(true);
1325                String expRules = Utility.unescape(DATA[d+2]);
1326                String expEscapedRules = DATA[d+2];
1327                if (rules.equals(expRules)) {
1328                    logln("Ok: " + DATA[d+1] +
1329                            " => " + Utility.escape(rules));
1330                } else {
1331                    errln("FAIL: " + DATA[d+1] +
1332                            " => " + Utility.escape(rules + ", exp " + expRules));
1333                }
1334                if (escapedRules.equals(expEscapedRules)) {
1335                    logln("Ok: " + DATA[d+1] +
1336                            " => " + escapedRules);
1337                } else {
1338                    errln("FAIL: " + DATA[d+1] +
1339                            " => " + escapedRules + ", exp " + expEscapedRules);
1340                }
1341
1342            } else {
1343                // UnicodeSet test
1344                String pat = DATA[d+1];
1345                String expToPat = DATA[d+2];
1346                UnicodeSet set = new UnicodeSet(pat);
1347
1348                // Adjust spacing etc. as necessary.
1349                String toPat;
1350                toPat = set.toPattern(true);
1351                if (expToPat.equals(toPat)) {
1352                    logln("Ok: " + pat +
1353                            " => " + toPat);
1354                } else {
1355                    errln("FAIL: " + pat +
1356                            " => " + Utility.escape(toPat) +
1357                            ", exp " + Utility.escape(pat));
1358                }
1359            }
1360        }
1361    }
1362
1363    public void TestContext() {
1364        Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l
1365
1366        expect("de > x; {d}e > y;",
1367                "de",
1368                "ye",
1369                pos);
1370
1371        expect("ab{c} > z;",
1372                "xadabdabcy",
1373        "xadabdabzy");
1374    }
1375
1376    static final String CharsToUnicodeString(String s) {
1377        return Utility.unescape(s);
1378    }
1379
1380    public void TestSupplemental() {
1381
1382        expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" +
1383        "a > $a; $s > i;"),
1384        CharsToUnicodeString("ab\\U0001030Fx"),
1385        CharsToUnicodeString("\\U00010300bix"));
1386
1387        expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" +
1388                "$b=[A-Z\\U00010400-\\U0001044D];" +
1389        "($a)($b) > $2 $1;"),
1390        CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1391        CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1392
1393        // k|ax\\U00010300xm
1394
1395        // k|a\\U00010400\\U00010300xm
1396        // ky|\\U00010400\\U00010300xm
1397        // ky\\U00010400|\\U00010300xm
1398
1399        // ky\\U00010400|\\U00010300\\U00010400m
1400        // ky\\U00010400y|\\U00010400m
1401        expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" +
1402                "$a {x} > | @ \\U00010400;" +
1403        "{$a} [^\\u0000-\\uFFFF] > y;"),
1404        CharsToUnicodeString("kax\\U00010300xm"),
1405        CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1406
1407        expect(Transliterator.getInstance("Any-Name"),
1408                CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1409        "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1410
1411        expect(Transliterator.getInstance("Name-Any"),
1412                "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
1413                CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
1414
1415        expect(Transliterator.getInstance("Any-Hex/Unicode"),
1416                CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1417        "U+10330U+10FF00U+E0061U+00A0");
1418
1419        expect(Transliterator.getInstance("Any-Hex/C"),
1420                CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1421        "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1422
1423        expect(Transliterator.getInstance("Any-Hex/Perl"),
1424                CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1425        "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1426
1427        expect(Transliterator.getInstance("Any-Hex/Java"),
1428                CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1429        "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1430
1431        expect(Transliterator.getInstance("Any-Hex/XML"),
1432                CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1433        "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1434
1435        expect(Transliterator.getInstance("Any-Hex/XML10"),
1436                CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1437        "&#66352;&#1113856;&#917601;&#160;");
1438
1439        expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
1440                CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1441                CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1442    }
1443
1444    public void TestQuantifier() {
1445
1446        // Make sure @ in a quantified anteContext works
1447        expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1448                "AAAAAb",
1449        "aaa(aac)");
1450
1451        // Make sure @ in a quantified postContext works
1452        expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1453                "baaaaa",
1454        "caa(aaa)");
1455
1456        // Make sure @ in a quantified postContext with seg ref works
1457        expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1458                "baaaaa",
1459        "baa(aaa)");
1460
1461        // Make sure @ past ante context doesn't enter ante context
1462        Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5);
1463        expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1464                "xxxab",
1465                "xxx(ac)",
1466                pos);
1467
1468        // Make sure @ past post context doesn't pass limit
1469        Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2);
1470        expect("{b} a+ > c @@ |; x > y; a > A;",
1471                "baxx",
1472                "caxx",
1473                pos2);
1474
1475        // Make sure @ past post context doesn't enter post context
1476        expect("{b} a+ > c @@ |; x > y; a > A;",
1477                "baxx",
1478        "cayy");
1479
1480        expect("(ab)? c > d;",
1481                "c abc ababc",
1482        "d d abd");
1483
1484        // NOTE: The (ab)+ when referenced just yields a single "ab",
1485        // not the full sequence of them.  This accords with perl behavior.
1486        expect("(ab)+ {x} > '(' $1 ')';",
1487                "x abx ababxy",
1488        "x ab(ab) abab(ab)y");
1489
1490        expect("b+ > x;",
1491                "ac abc abbc abbbc",
1492        "ac axc axc axc");
1493
1494        expect("[abc]+ > x;",
1495                "qac abrc abbcs abtbbc",
1496        "qx xrx xs xtx");
1497
1498        expect("q{(ab)+} > x;",
1499                "qa qab qaba qababc qaba",
1500        "qa qx qxa qxc qxa");
1501
1502        expect("q(ab)* > x;",
1503                "qa qab qaba qababc",
1504        "xa x xa xc");
1505
1506        // NOTE: The (ab)+ when referenced just yields a single "ab",
1507        // not the full sequence of them.  This accords with perl behavior.
1508        expect("q(ab)* > '(' $1 ')';",
1509                "qa qab qaba qababc",
1510        "()a (ab) (ab)a (ab)c");
1511
1512        // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1513        // quoted string
1514        expect("'ab'+ > x;",
1515                "bb ab ababb",
1516        "bb x xb");
1517
1518        // $foo+ and $foo* -- the quantifier should apply to the entire
1519        // variable reference
1520        expect("$var = ab; $var+ > x;",
1521                "bb ab ababb",
1522        "bb x xb");
1523    }
1524
1525    static class TestFact implements Transliterator.Factory {
1526        static class NameableNullTrans extends Transliterator {
1527            public NameableNullTrans(String id) {
1528                super(id, null);
1529            }
1530            protected void handleTransliterate(Replaceable text,
1531                    Position offsets, boolean incremental) {
1532                offsets.start = offsets.limit;
1533            }
1534        }
1535        String id;
1536        public TestFact(String theID) {
1537            id = theID;
1538        }
1539        public Transliterator getInstance(String ignoredID) {
1540            return new NameableNullTrans(id);
1541        }
1542    }
1543
1544    public void TestSTV() {
1545        Enumeration es = Transliterator.getAvailableSources();
1546        for (int i=0; es.hasMoreElements(); ++i) {
1547            String source = (String) es.nextElement();
1548            logln("" + i + ": " + source);
1549            if (source.length() == 0) {
1550                errln("FAIL: empty source");
1551                continue;
1552            }
1553            Enumeration et = Transliterator.getAvailableTargets(source);
1554            for (int j=0; et.hasMoreElements(); ++j) {
1555                String target = (String) et.nextElement();
1556                logln(" " + j + ": " + target);
1557                if (target.length() == 0) {
1558                    errln("FAIL: empty target");
1559                    continue;
1560                }
1561                Enumeration ev = Transliterator.getAvailableVariants(source, target);
1562                for (int k=0; ev.hasMoreElements(); ++k) {
1563                    String variant = (String) ev.nextElement();
1564                    if (variant.length() == 0) {
1565                        logln("  " + k + ": <empty>");
1566                    } else {
1567                        logln("  " + k + ": " + variant);
1568                    }
1569                }
1570            }
1571        }
1572
1573        // Test registration
1574        String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1575        String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
1576        String[] SOURCES = { null, "Seoridf", "Oewoir" };
1577        for (int i=0; i<3; ++i) {
1578            Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
1579            try {
1580                Transliterator t = Transliterator.getInstance(IDS[i]);
1581                if (t.getID().equals(IDS[i])) {
1582                    logln("Ok: Registration/creation succeeded for ID " +
1583                            IDS[i]);
1584                } else {
1585                    errln("FAIL: Registration of ID " +
1586                            IDS[i] + " creates ID " + t.getID());
1587                }
1588                Transliterator.unregister(IDS[i]);
1589                try {
1590                    t = Transliterator.getInstance(IDS[i]);
1591                    errln("FAIL: Unregistration failed for ID " +
1592                            IDS[i] + "; still receiving ID " + t.getID());
1593                } catch (IllegalArgumentException e2) {
1594                    // Good; this is what we expect
1595                    logln("Ok; Unregistered " + IDS[i]);
1596                }
1597            } catch (IllegalArgumentException e) {
1598                errln("FAIL: Registration/creation failed for ID " +
1599                        IDS[i]);
1600            } finally {
1601                Transliterator.unregister(IDS[i]);
1602            }
1603        }
1604
1605        // Make sure getAvailable API reflects removal
1606        for (Enumeration e = Transliterator.getAvailableIDs();
1607        e.hasMoreElements(); ) {
1608            String id = (String) e.nextElement();
1609            for (int i=0; i<3; ++i) {
1610                if (id.equals(FULL_IDS[i])) {
1611                    errln("FAIL: unregister(" + id + ") failed");
1612                }
1613            }
1614        }
1615        for (Enumeration e = Transliterator.getAvailableTargets("Any");
1616        e.hasMoreElements(); ) {
1617            String t = (String) e.nextElement();
1618            if (t.equals(IDS[0])) {
1619                errln("FAIL: unregister(Any-" + t + ") failed");
1620            }
1621        }
1622        for (Enumeration e = Transliterator.getAvailableSources();
1623        e.hasMoreElements(); ) {
1624            String s = (String) e.nextElement();
1625            for (int i=0; i<3; ++i) {
1626                if (SOURCES[i] == null) continue;
1627                if (s.equals(SOURCES[i])) {
1628                    errln("FAIL: unregister(" + s + "-*) failed");
1629                }
1630            }
1631        }
1632    }
1633
1634    /**
1635     * Test inverse of Greek-Latin; Title()
1636     */
1637    public void TestCompoundInverse() {
1638        Transliterator t = Transliterator.getInstance
1639        ("Greek-Latin; Title()", Transliterator.REVERSE);
1640        if (t == null) {
1641            errln("FAIL: createInstance");
1642            return;
1643        }
1644        String exp = "(Title);Latin-Greek";
1645        if (t.getID().equals(exp)) {
1646            logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
1647                    t.getID());
1648        } else {
1649            errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
1650                    t.getID() + "\", expected \"" + exp + "\"");
1651        }
1652    }
1653
1654    /**
1655     * Test NFD chaining with RBT
1656     */
1657    public void TestNFDChainRBT() {
1658        Transliterator t = Transliterator.createFromRules(
1659                "TEST", "::NFD; aa > Q; a > q;",
1660                Transliterator.FORWARD);
1661        logln(t.toRules(true));
1662        expect(t, "aa", "Q");
1663    }
1664
1665    /**
1666     * Inverse of "Null" should be "Null". (J21)
1667     */
1668    public void TestNullInverse() {
1669        Transliterator t = Transliterator.getInstance("Null");
1670        Transliterator u = t.getInverse();
1671        if (!u.getID().equals("Null")) {
1672            errln("FAIL: Inverse of Null should be Null");
1673        }
1674    }
1675
1676    /**
1677     * Check ID of inverse of alias. (J22)
1678     */
1679    public void TestAliasInverseID() {
1680        String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
1681        Transliterator t = Transliterator.getInstance(ID);
1682        Transliterator u = t.getInverse();
1683        String exp = "Hangul-Latin";
1684        String got = u.getID();
1685        if (!got.equals(exp)) {
1686            errln("FAIL: Inverse of " + ID + " is " + got +
1687                    ", expected " + exp);
1688        }
1689    }
1690
1691    /**
1692     * Test IDs of inverses of compound transliterators. (J20)
1693     */
1694    public void TestCompoundInverseID() {
1695        String ID = "Latin-Jamo;NFC(NFD)";
1696        Transliterator t = Transliterator.getInstance(ID);
1697        Transliterator u = t.getInverse();
1698        String exp = "NFD(NFC);Jamo-Latin";
1699        String got = u.getID();
1700        if (!got.equals(exp)) {
1701            errln("FAIL: Inverse of " + ID + " is " + got +
1702                    ", expected " + exp);
1703        }
1704    }
1705
1706    /**
1707     * Test undefined variable.
1708     */
1709    public void TestUndefinedVariable() {
1710        String rule = "$initial } a <> \u1161;";
1711        try {
1712            Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD);
1713        } catch (IllegalArgumentException e) {
1714            logln("OK: Got exception for " + rule + ", as expected: " +
1715                    e.getMessage());
1716            return;
1717        }
1718        errln("Fail: bogus rule " + rule + " compiled without error");
1719    }
1720
1721    /**
1722     * Test empty context.
1723     */
1724    public void TestEmptyContext() {
1725        expect(" { a } > b;", "xay a ", "xby b ");
1726    }
1727
1728    /**
1729     * Test compound filter ID syntax
1730     */
1731    public void TestCompoundFilterID() {
1732        String[] DATA = {
1733                // Col. 1 = ID or rule set (latter must start with #)
1734
1735                // = columns > 1 are null if expect col. 1 to be illegal =
1736
1737                // Col. 2 = direction, "F..." or "R..."
1738                // Col. 3 = source string
1739                // Col. 4 = exp result
1740
1741                "[abc]; [abc]", null, null, null, // multiple filters
1742                "Latin-Greek; [abc];", null, null, null, // misplaced filter
1743                "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c",
1744                "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1745                "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c",
1746                "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393",
1747        };
1748
1749        for (int i=0; i<DATA.length; i+=4) {
1750            String id = DATA[i];
1751            int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ?
1752                    Transliterator.REVERSE : Transliterator.FORWARD;
1753            String source = DATA[i+2];
1754            String exp = DATA[i+3];
1755            boolean expOk = (DATA[i+1] != null);
1756            Transliterator t = null;
1757            IllegalArgumentException e = null;
1758            try {
1759                if (id.charAt(0) == '#') {
1760                    t = Transliterator.createFromRules("ID", id, direction);
1761                } else {
1762                    t = Transliterator.getInstance(id, direction);
1763                }
1764            } catch (IllegalArgumentException ee) {
1765                e = ee;
1766            }
1767            boolean ok = (t != null && e == null);
1768            if (ok == expOk) {
1769                logln("Ok: " + id + " => " + t +
1770                        (e != null ? (", " + e.getMessage()) : ""));
1771                if (source != null) {
1772                    expect(t, source, exp);
1773                }
1774            } else {
1775                errln("FAIL: " + id + " => " + t +
1776                        (e != null ? (", " + e.getMessage()) : ""));
1777            }
1778        }
1779    }
1780
1781    /**
1782     * Test new property set syntax
1783     */
1784    public void TestPropertySet() {
1785        expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
1786        expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
1787        "[ a stitch ]\n[ in time ]\r[ saves 9]");
1788    }
1789
1790    /**
1791     * Test various failure points of the new 2.0 engine.
1792     */
1793    public void TestNewEngine() {
1794        Transliterator t = Transliterator.getInstance("Latin-Hiragana");
1795        // Katakana should be untouched
1796        expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
1797
1798        if (true) {
1799            // This test will only work if Transliterator.ROLLBACK is
1800            // true.  Otherwise, this test will fail, revealing a
1801            // limitation of global filters in incremental mode.
1802
1803            Transliterator a =
1804                Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD);
1805            Transliterator A =
1806                Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD);
1807
1808            //Transliterator array[] = new Transliterator[] {
1809            //    a,
1810            //    Transliterator.getInstance("NFD"),
1811            //    A };
1812            //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
1813
1814            try {
1815                Transliterator.registerInstance(a);
1816                Transliterator.registerInstance(A);
1817
1818                t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b");
1819                expect(t, "aAaA", "bAbA");
1820
1821                Transliterator[] u = t.getElements();
1822                assertTrue("getElements().length", u.length == 3);
1823                assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
1824                assertEquals("getElements()[1]", u[1].getID(), "NFD");
1825                assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
1826
1827                t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
1828                t.setFilter(new UnicodeSet("[:Ll:]"));
1829                expect(t, "aAaA", "bAbA");
1830            } finally {
1831                Transliterator.unregister("a_to_A");
1832                Transliterator.unregister("A_to_b");
1833            }
1834        }
1835
1836        expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
1837                "a",
1838        "ax");
1839
1840        String gr =
1841            "$ddot = \u0308 ;" +
1842            "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" +
1843            "$rough = \u0314 ;" +
1844            "($lcgvowel+ $ddot?) $rough > h | $1 ;" +
1845            "\u03b1 <> a ;" +
1846            "$rough <> h ;";
1847
1848        expect(gr, "\u03B1\u0314", "ha");
1849    }
1850
1851    /**
1852     * Test quantified segment behavior.  We want:
1853     * ([abc])+ > x $1 x; applied to "cba" produces "xax"
1854     */
1855    public void TestQuantifiedSegment() {
1856        // The normal case
1857        expect("([abc]+) > x $1 x;", "cba", "xcbax");
1858
1859        // The tricky case; the quantifier is around the segment
1860        expect("([abc])+ > x $1 x;", "cba", "xax");
1861
1862        // Tricky case in reverse direction
1863        expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
1864
1865        // Check post-context segment
1866        expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
1867
1868        // Test toRule/toPattern for non-quantified segment.
1869        // Careful with spacing here.
1870        String r = "([a-c]){q} > x $1 x;";
1871        Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1872        String rr = t.toRules(true);
1873        if (!r.equals(rr)) {
1874            errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1875        } else {
1876            logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1877        }
1878
1879        // Test toRule/toPattern for quantified segment.
1880        // Careful with spacing here.
1881        r = "([a-c])+{q} > x $1 x;";
1882        t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD);
1883        rr = t.toRules(true);
1884        if (!r.equals(rr)) {
1885            errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1886        } else {
1887            logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1888        }
1889    }
1890
1891    //======================================================================
1892    // Ram's tests
1893    //======================================================================
1894    /* this test performs  test of rules in ISO 15915 */
1895    public void  TestDevanagariLatinRT(){
1896        String[]  source = {
1897                "bh\u0101rata",
1898                "kra",
1899                "k\u1E63a",
1900                "khra",
1901                "gra",
1902                "\u1E45ra",
1903                "cra",
1904                "chra",
1905                "j\u00F1a",
1906                "jhra",
1907                "\u00F1ra",
1908                "\u1E6Dya",
1909                "\u1E6Dhra",
1910                "\u1E0Dya",
1911                //"r\u0323ya", // \u095c is not valid in Devanagari
1912                "\u1E0Dhya",
1913                "\u1E5Bhra",
1914                "\u1E47ra",
1915                "tta",
1916                "thra",
1917                "dda",
1918                "dhra",
1919                "nna",
1920                "pra",
1921                "phra",
1922                "bra",
1923                "bhra",
1924                "mra",
1925                "\u1E49ra",
1926                //"l\u0331ra",
1927                "yra",
1928                "\u1E8Fra",
1929                //"l-",
1930                "vra",
1931                "\u015Bra",
1932                "\u1E63ra",
1933                "sra",
1934                "hma",
1935                "\u1E6D\u1E6Da",
1936                "\u1E6D\u1E6Dha",
1937                "\u1E6Dh\u1E6Dha",
1938                "\u1E0D\u1E0Da",
1939                "\u1E0D\u1E0Dha",
1940                "\u1E6Dya",
1941                "\u1E6Dhya",
1942                "\u1E0Dya",
1943                "\u1E0Dhya",
1944                // Not roundtrippable --
1945                // \u0939\u094d\u094d\u092E  - hma
1946                // \u0939\u094d\u092E         - hma
1947                // CharsToUnicodeString("hma"),
1948                "hya",
1949                "\u015Br\u0325",
1950                "\u015Bca",
1951                "\u0115",
1952                "san\u0304j\u012Bb s\u0113nagupta",
1953                "\u0101nand vaddir\u0101ju",
1954        };
1955        String[]  expected = {
1956                "\u092D\u093E\u0930\u0924",    /* bha\u0304rata */
1957                "\u0915\u094D\u0930",          /* kra         */
1958                "\u0915\u094D\u0937",          /* ks\u0323a  */
1959                "\u0916\u094D\u0930",          /* khra        */
1960                "\u0917\u094D\u0930",          /* gra         */
1961                "\u0919\u094D\u0930",          /* n\u0307ra  */
1962                "\u091A\u094D\u0930",          /* cra         */
1963                "\u091B\u094D\u0930",          /* chra        */
1964                "\u091C\u094D\u091E",          /* jn\u0303a  */
1965                "\u091D\u094D\u0930",          /* jhra        */
1966                "\u091E\u094D\u0930",          /* n\u0303ra  */
1967                "\u091F\u094D\u092F",          /* t\u0323ya  */
1968                "\u0920\u094D\u0930",          /* t\u0323hra */
1969                "\u0921\u094D\u092F",          /* d\u0323ya  */
1970                //"\u095C\u094D\u092F",          /* r\u0323ya  */ // \u095c is not valid in Devanagari
1971                "\u0922\u094D\u092F",          /* d\u0323hya */
1972                "\u0922\u093C\u094D\u0930",    /* r\u0323hra */
1973                "\u0923\u094D\u0930",          /* n\u0323ra  */
1974                "\u0924\u094D\u0924",          /* tta         */
1975                "\u0925\u094D\u0930",          /* thra        */
1976                "\u0926\u094D\u0926",          /* dda         */
1977                "\u0927\u094D\u0930",          /* dhra        */
1978                "\u0928\u094D\u0928",          /* nna         */
1979                "\u092A\u094D\u0930",          /* pra         */
1980                "\u092B\u094D\u0930",          /* phra        */
1981                "\u092C\u094D\u0930",          /* bra         */
1982                "\u092D\u094D\u0930",          /* bhra        */
1983                "\u092E\u094D\u0930",          /* mra         */
1984                "\u0929\u094D\u0930",          /* n\u0331ra  */
1985                //"\u0934\u094D\u0930",          /* l\u0331ra  */
1986                "\u092F\u094D\u0930",          /* yra         */
1987                "\u092F\u093C\u094D\u0930",    /* y\u0307ra  */
1988                //"l-",
1989                "\u0935\u094D\u0930",          /* vra         */
1990                "\u0936\u094D\u0930",          /* s\u0301ra  */
1991                "\u0937\u094D\u0930",          /* s\u0323ra  */
1992                "\u0938\u094D\u0930",          /* sra         */
1993                "\u0939\u094d\u092E",          /* hma         */
1994                "\u091F\u094D\u091F",          /* t\u0323t\u0323a  */
1995                "\u091F\u094D\u0920",          /* t\u0323t\u0323ha */
1996                "\u0920\u094D\u0920",          /* t\u0323ht\u0323ha*/
1997                "\u0921\u094D\u0921",          /* d\u0323d\u0323a  */
1998                "\u0921\u094D\u0922",          /* d\u0323d\u0323ha */
1999                "\u091F\u094D\u092F",          /* t\u0323ya  */
2000                "\u0920\u094D\u092F",          /* t\u0323hya */
2001                "\u0921\u094D\u092F",          /* d\u0323ya  */
2002                "\u0922\u094D\u092F",          /* d\u0323hya */
2003                // "hma",                         /* hma         */
2004                "\u0939\u094D\u092F",          /* hya         */
2005                "\u0936\u0943",                /* s\u0301r\u0325a  */
2006                "\u0936\u094D\u091A",          /* s\u0301ca  */
2007                "\u090d",                      /* e\u0306    */
2008                "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
2009                "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941",
2010        };
2011
2012        Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD );
2013        Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2014
2015        for(int i= 0; i<source.length; i++){
2016            expect(latinToDev,(source[i]),(expected[i]));
2017            expect(devToLatin,(expected[i]),(source[i]));
2018        }
2019
2020    }
2021    public void  TestTeluguLatinRT(){
2022        String[]  source = {
2023                "raghur\u0101m vi\u015Bvan\u0101dha",                           /* Raghuram Viswanadha    */
2024                "\u0101nand vaddir\u0101ju",                                    /* Anand Vaddiraju        */
2025                "r\u0101j\u012Bv ka\u015Barab\u0101da",                         /* Rajeev Kasarabada      */
2026                "san\u0304j\u012Bv ka\u015Barab\u0101da",                       /* sanjeev kasarabada     */
2027                "san\u0304j\u012Bb sen'gupta",                                  /* sanjib sengupata       */
2028                "amar\u0113ndra hanum\u0101nula",                               /* Amarendra hanumanula   */
2029                "ravi kum\u0101r vi\u015Bvan\u0101dha",                         /* Ravi Kumar Viswanadha  */
2030                "\u0101ditya kandr\u0113gula",                                  /* Aditya Kandregula      */
2031                "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di",      /* Shridhar Kantamsetty   */
2032                "m\u0101dhav de\u015Be\u1E6D\u1E6Di"                            /* Madhav Desetty         */
2033        };
2034
2035        String[]  expected = {
2036                "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2037                "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
2038                "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2039                "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
2040                "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
2041                "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
2042                "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
2043                "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
2044                "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2045                "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
2046        };
2047
2048
2049        Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD);
2050        Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD);
2051
2052        for(int i= 0; i<source.length; i++){
2053            expect(latinToDev,(source[i]),(expected[i]));
2054            expect(devToLatin,(expected[i]),(source[i]));
2055        }
2056    }
2057
2058    public void  TestSanskritLatinRT(){
2059        int MAX_LEN =15;
2060        String[]  source = {
2061                "rmk\u1E63\u0113t",
2062                "\u015Br\u012Bmad",
2063                "bhagavadg\u012Bt\u0101",
2064                "adhy\u0101ya",
2065                "arjuna",
2066                "vi\u1E63\u0101da",
2067                "y\u014Dga",
2068                "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2069                "uv\u0101cr\u0325",
2070                "dharmak\u1E63\u0113tr\u0113",
2071                "kuruk\u1E63\u0113tr\u0113",
2072                "samav\u0113t\u0101",
2073                "yuyutsava\u1E25",
2074                "m\u0101mak\u0101\u1E25",
2075                // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2076                "kimakurvata",
2077                "san\u0304java",
2078        };
2079        String[]  expected = {
2080                "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2081                "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2082                "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2083                "\u0905\u0927\u094d\u092f\u093e\u092f",
2084                "\u0905\u0930\u094d\u091c\u0941\u0928",
2085                "\u0935\u093f\u0937\u093e\u0926",
2086                "\u092f\u094b\u0917",
2087                "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2088                "\u0909\u0935\u093E\u091A\u0943",
2089                "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2090                "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2091                "\u0938\u092e\u0935\u0947\u0924\u093e",
2092                "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2093                "\u092e\u093e\u092e\u0915\u093e\u0903",
2094                //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2095                "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2096                "\u0938\u0902\u091c\u0935",
2097        };
2098
2099        Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD);
2100        Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD);
2101        for(int i= 0; i<MAX_LEN; i++){
2102            expect(latinToDev,(source[i]),(expected[i]));
2103            expect(devToLatin,(expected[i]),(source[i]));
2104        }
2105    }
2106
2107    public void  TestCompoundLatinRT(){
2108        int MAX_LEN =15;
2109        String[]  source = {
2110                "rmk\u1E63\u0113t",
2111                "\u015Br\u012Bmad",
2112                "bhagavadg\u012Bt\u0101",
2113                "adhy\u0101ya",
2114                "arjuna",
2115                "vi\u1E63\u0101da",
2116                "y\u014Dga",
2117                "dhr\u0325tar\u0101\u1E63\u1E6Dra",
2118                "uv\u0101cr\u0325",
2119                "dharmak\u1E63\u0113tr\u0113",
2120                "kuruk\u1E63\u0113tr\u0113",
2121                "samav\u0113t\u0101",
2122                "yuyutsava\u1E25",
2123                "m\u0101mak\u0101\u1E25",
2124                // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
2125                "kimakurvata",
2126                "san\u0304java"
2127        };
2128        String[]  expected = {
2129                "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
2130                "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
2131                "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
2132                "\u0905\u0927\u094d\u092f\u093e\u092f",
2133                "\u0905\u0930\u094d\u091c\u0941\u0928",
2134                "\u0935\u093f\u0937\u093e\u0926",
2135                "\u092f\u094b\u0917",
2136                "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
2137                "\u0909\u0935\u093E\u091A\u0943",
2138                "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2139                "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
2140                "\u0938\u092e\u0935\u0947\u0924\u093e",
2141                "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
2142                "\u092e\u093e\u092e\u0915\u093e\u0903",
2143                //  "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
2144                "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
2145                "\u0938\u0902\u091c\u0935"
2146        };
2147
2148        Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD);
2149        Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD);
2150        for(int i= 0; i<MAX_LEN; i++){
2151            expect(latinToDevToLatin,(source[i]),(source[i]));
2152            expect(devToLatinToDev,(expected[i]),(expected[i]));
2153        }
2154    }
2155    /**
2156     * Test Gurmukhi-Devanagari Tippi and Bindi
2157     */
2158    public void TestGurmukhiDevanagari(){
2159        // the rule says:
2160        // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2161        // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2162
2163        UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
2164        UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]");
2165
2166        UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
2167        UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
2168        Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi");
2169        StringBuffer src = new StringBuffer(" \u0902");
2170        StringBuffer expect = new StringBuffer(" \u0A02");
2171        while(vIter.next()){
2172            src.setCharAt(0,(char) vIter.codepoint);
2173            expect.setCharAt(0,(char) (vIter.codepoint+0x0100));
2174            expect(trans,src.toString(),expect.toString());
2175        }
2176
2177        expect.setCharAt(1,'\u0A70');
2178        while(nvIter.next()){
2179            //src.setCharAt(0,(char) nvIter.codepoint);
2180            src.setCharAt(0,(char)nvIter.codepoint);
2181            expect.setCharAt(0,(char) (nvIter.codepoint+0x0100));
2182            expect(trans,src.toString(),expect.toString());
2183        }
2184    }
2185    /**
2186     * Test instantiation from a locale.
2187     */
2188    public void TestLocaleInstantiation() {
2189        Transliterator t;
2190        try{
2191            t = Transliterator.getInstance("te_IN-Latin");
2192            //expect(t, "\u0430", "a");
2193        }catch(IllegalArgumentException ex){
2194            warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage());
2195        }
2196        try{
2197            t = Transliterator.getInstance("ru_RU-Latin");
2198            expect(t, "\u0430", "a");
2199        }catch(IllegalArgumentException ex){
2200            warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage());
2201        }
2202        try{
2203            t = Transliterator.getInstance("en-el");
2204            expect(t, "a", "\u03B1");
2205        }catch(IllegalArgumentException ex){
2206            warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage());
2207        }
2208    }
2209
2210    /**
2211     * Test title case handling of accent (should ignore accents)
2212     */
2213    public void TestTitleAccents() {
2214        Transliterator t = Transliterator.getInstance("Title");
2215        expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
2216    }
2217
2218    /**
2219     * Basic test of a locale resource based rule.
2220     */
2221    public void TestLocaleResource() {
2222        String DATA[] = {
2223                // id                    from             to
2224                "Latin-Greek/UNGEGN",    "b",             "\u03bc\u03c0",
2225                "Latin-el",              "b",             "\u03bc\u03c0",
2226                "Latin-Greek",           "b",             "\u03B2",
2227                "Greek-Latin/UNGEGN",    "\u03B2",        "v",
2228                "el-Latin",              "\u03B2",        "v",
2229                "Greek-Latin",           "\u03B2",        "b",
2230        };
2231        for (int i=0; i<DATA.length; i+=3) {
2232            Transliterator t = Transliterator.getInstance(DATA[i]);
2233            expect(t, DATA[i+1], DATA[i+2]);
2234        }
2235    }
2236
2237    /**
2238     * Make sure parse errors reference the right line.
2239     */
2240    public void TestParseError() {
2241        String rule =
2242            "a > b;\n" +
2243            "# more stuff\n" +
2244            "d << b;";
2245        try {
2246            Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2247            if(t!=null){
2248                errln("FAIL: Did not get expected exception");
2249            }
2250        } catch (IllegalArgumentException e) {
2251            String err = e.getMessage();
2252            if (err.indexOf("d << b") >= 0) {
2253                logln("Ok: " + err);
2254            } else {
2255                errln("FAIL: " + err);
2256            }
2257            return;
2258        }
2259        errln("FAIL: no syntax error");
2260    }
2261
2262    /**
2263     * Make sure sets on output are disallowed.
2264     */
2265    public void TestOutputSet() {
2266        String rule = "$set = [a-cm-n]; b > $set;";
2267        Transliterator t = null;
2268        try {
2269            t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2270            if(t!=null){
2271                errln("FAIL: Did not get the expected exception");
2272            }
2273        } catch (IllegalArgumentException e) {
2274            logln("Ok: " + e.getMessage());
2275            return;
2276        }
2277        errln("FAIL: No syntax error");
2278    }
2279
2280    /**
2281     * Test the use variable range pragma, making sure that use of
2282     * variable range characters is detected and flagged as an error.
2283     */
2284    public void TestVariableRange() {
2285        String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2286        try {
2287            Transliterator t =
2288                Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2289            if(t!=null){
2290                errln("FAIL: Did not get the expected exception");
2291            }
2292        } catch (IllegalArgumentException e) {
2293            logln("Ok: " + e.getMessage());
2294            return;
2295        }
2296        errln("FAIL: No syntax error");
2297    }
2298
2299    /**
2300     * Test invalid post context error handling
2301     */
2302    public void TestInvalidPostContext() {
2303        try {
2304            Transliterator t =
2305                Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD);
2306            if(t!=null){
2307                errln("FAIL: Did not get the expected exception");
2308            }
2309        } catch (IllegalArgumentException e) {
2310            String msg = e.getMessage();
2311            if (msg.indexOf("a}b{c") >= 0) {
2312                logln("Ok: " + msg);
2313            } else {
2314                errln("FAIL: " + msg);
2315            }
2316            return;
2317        }
2318        errln("FAIL: No syntax error");
2319    }
2320
2321    /**
2322     * Test ID form variants
2323     */
2324    public void TestIDForms() {
2325        String DATA[] = {
2326                "NFC", null, "NFD",
2327                "nfd", null, "NFC", // make sure case is ignored
2328                "Any-NFKD", null, "Any-NFKC",
2329                "Null", null, "Null",
2330                "-nfkc", "nfkc", "NFKD",
2331                "-nfkc/", "nfkc", "NFKD",
2332                "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
2333                "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
2334                "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
2335                "Source-", null, null,
2336                "Source/Variant-", null, null,
2337                "Source-/Variant", null, null,
2338                "/Variant", null, null,
2339                "/Variant-", null, null,
2340                "-/Variant", null, null,
2341                "-/", null, null,
2342                "-", null, null,
2343                "/", null, null,
2344        };
2345
2346        for (int i=0; i<DATA.length; i+=3) {
2347            String ID = DATA[i];
2348            String expID = DATA[i+1];
2349            String expInvID = DATA[i+2];
2350            boolean expValid = (expInvID != null);
2351            if (expID == null) {
2352                expID = ID;
2353            }
2354            try {
2355                Transliterator t =
2356                    Transliterator.getInstance(ID);
2357                Transliterator u = t.getInverse();
2358                if (t.getID().equals(expID) &&
2359                        u.getID().equals(expInvID)) {
2360                    logln("Ok: " + ID + ".getInverse() => " + expInvID);
2361                } else {
2362                    errln("FAIL: getInstance(" + ID + ") => " +
2363                            t.getID() + " x getInverse() => " + u.getID() +
2364                            ", expected " + expInvID);
2365                }
2366            } catch (IllegalArgumentException e) {
2367                if (!expValid) {
2368                    logln("Ok: getInstance(" + ID + ") => " + e.getMessage());
2369                } else {
2370                    errln("FAIL: getInstance(" + ID + ") => " + e.getMessage());
2371                }
2372            }
2373        }
2374    }
2375
2376    void checkRules(String label, Transliterator t2, String testRulesForward) {
2377        String rules2 = t2.toRules(true);
2378        //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2379        rules2 = TestUtility.replace(rules2, " ", "");
2380        rules2 = TestUtility.replace(rules2, "\n", "");
2381        rules2 = TestUtility.replace(rules2, "\r", "");
2382        testRulesForward = TestUtility.replace(testRulesForward, " ", "");
2383
2384        if (!rules2.equals(testRulesForward)) {
2385            errln(label);
2386            logln("GENERATED RULES: " + rules2);
2387            logln("SHOULD BE:       " + testRulesForward);
2388        }
2389    }
2390
2391    /**
2392     * Mark's toRules test.
2393     */
2394    public void TestToRulesMark() {
2395
2396        String testRules =
2397            "::[[:Latin:][:Mark:]];"
2398            + "::NFKD (NFC);"
2399            + "::Lower (Lower);"
2400            + "a <> \\u03B1;" // alpha
2401            + "::NFKC (NFD);"
2402            + "::Upper (Lower);"
2403            + "::Lower ();"
2404            + "::([[:Greek:][:Mark:]]);"
2405            ;
2406        String testRulesForward =
2407            "::[[:Latin:][:Mark:]];"
2408            + "::NFKD(NFC);"
2409            + "::Lower(Lower);"
2410            + "a > \\u03B1;"
2411            + "::NFKC(NFD);"
2412            + "::Upper (Lower);"
2413            + "::Lower ();"
2414            ;
2415        String testRulesBackward =
2416            "::[[:Greek:][:Mark:]];"
2417            + "::Lower (Upper);"
2418            + "::NFD(NFKC);"
2419            + "\\u03B1 > a;"
2420            + "::Lower(Lower);"
2421            + "::NFC(NFKD);"
2422            ;
2423        String source = "\u00E1"; // a-acute
2424        String target = "\u03AC"; // alpha-acute
2425
2426        Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD);
2427        Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE);
2428
2429        expect(t2, source, target);
2430        expect(t3, target, source);
2431
2432        checkRules("Failed toRules FORWARD", t2, testRulesForward);
2433        checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
2434    }
2435
2436    /**
2437     * Test Escape and Unescape transliterators.
2438     */
2439    public void TestEscape() {
2440        expect(Transliterator.getInstance("Hex-Any"),
2441                "\\x{40}\\U00000031&#x32;&#81;",
2442        "@12Q");
2443        expect(Transliterator.getInstance("Any-Hex/C"),
2444                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2445        "\\u0041\\U0010BEEF\\uFEED");
2446        expect(Transliterator.getInstance("Any-Hex/Java"),
2447                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2448        "\\u0041\\uDBEF\\uDEEF\\uFEED");
2449        expect(Transliterator.getInstance("Any-Hex/Perl"),
2450                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2451        "\\x{41}\\x{10BEEF}\\x{FEED}");
2452    }
2453
2454    /**
2455     * Make sure display names of variants look reasonable.
2456     */
2457    public void TestDisplayName() {
2458        String DATA[] = {
2459                // ID, forward name, reverse name
2460                // Update the text as necessary -- the important thing is
2461                // not the text itself, but how various cases are handled.
2462
2463                // Basic test
2464                "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2465
2466                // Variants
2467                "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
2468
2469                // Target-only IDs
2470                "NFC", "Any to NFC", "Any to NFD",
2471        };
2472
2473        Locale US = Locale.US;
2474
2475        for (int i=0; i<DATA.length; i+=3) {
2476            String name = Transliterator.getDisplayName(DATA[i], US);
2477            if (!name.equals(DATA[i+1])) {
2478                errln("FAIL: " + DATA[i] + ".getDisplayName() => " +
2479                        name + ", expected " + DATA[i+1]);
2480            } else {
2481                logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
2482            }
2483            Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE);
2484            name = Transliterator.getDisplayName(t.getID(), US);
2485            if (!name.equals(DATA[i+2])) {
2486                errln("FAIL: " + t.getID() + ".getDisplayName() => " +
2487                        name + ", expected " + DATA[i+2]);
2488            } else {
2489                logln("Ok: " + t.getID() + ".getDisplayName() => " + name);
2490            }
2491
2492            // Cover getDisplayName(String)
2493            ULocale save = ULocale.getDefault();
2494            ULocale.setDefault(ULocale.US);
2495            String name2 = Transliterator.getDisplayName(t.getID());
2496            if (!name.equals(name2))
2497                errln("FAIL: getDisplayName with default locale failed");
2498            ULocale.setDefault(save);
2499        }
2500    }
2501
2502    /**
2503     * Test anchor masking
2504     */
2505    public void TestAnchorMasking() {
2506        String rule = "^a > Q; a > q;";
2507        try {
2508            Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD);
2509            if(t==null){
2510                errln("FAIL: Did not get the expected exception");
2511            }
2512        } catch (IllegalArgumentException e) {
2513            errln("FAIL: " + rule + " => " + e);
2514        }
2515    }
2516
2517    /**
2518     * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
2519     * during ICU4J modularization to remove dependency of tests on Transliterator.
2520     */
2521    public void TestScriptAllCodepoints(){
2522        int code;
2523        HashSet  scriptIdsChecked   = new HashSet();
2524        HashSet  scriptAbbrsChecked = new HashSet();
2525        for( int i =0; i <= 0x10ffff; i++){
2526            code = UScript.getScript(i);
2527            if(code==UScript.INVALID_CODE){
2528                errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
2529            }
2530            String id =UScript.getName(code);
2531            String abbr = UScript.getShortName(code);
2532            if (!scriptIdsChecked.contains(id)) {
2533                scriptIdsChecked.add(id);
2534                String newId ="[:"+id+":];NFD";
2535                try{
2536                    Transliterator t = Transliterator.getInstance(newId);
2537                    if(t==null){
2538                        errln("Failed to create transliterator for "+hex(i)+
2539                                " script code: " +id);
2540                    }
2541                }catch(Exception e){
2542                    errln("Failed to create transliterator for "+hex(i)
2543                            +" script code: " +id
2544                            + " Exception: "+e.getMessage());
2545                }
2546            }
2547            if (!scriptAbbrsChecked.contains(abbr)) {
2548                scriptAbbrsChecked.add(abbr);
2549                String newAbbrId ="[:"+abbr+":];NFD";
2550                try{
2551                    Transliterator t = Transliterator.getInstance(newAbbrId);
2552                    if(t==null){
2553                        errln("Failed to create transliterator for "+hex(i)+
2554                                " script code: " +abbr);
2555                    }
2556                }catch(Exception e){
2557                    errln("Failed to create transliterator for "+hex(i)
2558                            +" script code: " +abbr
2559                            + " Exception: "+e.getMessage());
2560                }
2561            }
2562        }
2563    }
2564
2565
2566    static final String[][] registerRules = {
2567        {"Any-Dev1", "x > X; y > Y;"},
2568        {"Any-Dev2", "XY > Z"},
2569        {"Greek-Latin/FAKE",
2570            "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+
2571            "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+
2572            "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+
2573            "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;"
2574        },
2575    };
2576
2577    static final String DESERET_DEE = UTF16.valueOf(0x10414);
2578    static final String DESERET_dee = UTF16.valueOf(0x1043C);
2579
2580    static final String[][] testCases = {
2581
2582        // NORMALIZATION
2583        // should add more test cases
2584        {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2585        {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2586        {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2587        {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"},
2588
2589        // mp -> b BUG
2590        {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"},
2591        {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"},
2592
2593        // check for devanagari bug
2594        {"nfd;Dev1;Dev2;nfc", "xy", "Z"},
2595
2596        // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
2597        {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2598            "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2599            //TODO: enable this test once Titlecase works right
2600            //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2601            //          "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2602
2603            {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2604                "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE},
2605                {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2606                    "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee},
2607
2608                    {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2609                    {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE},
2610
2611                    // FORMS OF S
2612                    {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2613                    {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2614                    {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"},
2615                    {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"},
2616
2617                    // Tatiana bug
2618                    // Upper: TAT\u02B9\u00C2NA
2619                    // Lower: tat\u02B9\u00E2na
2620                    // Title: Tat\u02B9\u00E2na
2621                    {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"},
2622                    {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"},
2623                    {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"},
2624    };
2625
2626    public void TestSpecialCases() {
2627
2628        for (int i = 0; i < registerRules.length; ++i) {
2629            Transliterator t = Transliterator.createFromRules(registerRules[i][0],
2630                    registerRules[i][1], Transliterator.FORWARD);
2631            DummyFactory.add(registerRules[i][0], t);
2632        }
2633        for (int i = 0; i < testCases.length; ++i) {
2634            String name = testCases[i][0];
2635            Transliterator t = Transliterator.getInstance(name);
2636            String id = t.getID();
2637            String source = testCases[i][1];
2638            String target = null;
2639
2640            // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
2641
2642            if (testCases[i].length > 2)    target = testCases[i][2];
2643            else if (id.equalsIgnoreCase("NFD"))    target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFD);
2644            else if (id.equalsIgnoreCase("NFC"))    target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFC);
2645            else if (id.equalsIgnoreCase("NFKD"))   target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFKD);
2646            else if (id.equalsIgnoreCase("NFKC"))   target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFKC);
2647            else if (id.equalsIgnoreCase("Lower"))  target = UCharacter.toLowerCase(Locale.US, source);
2648            else if (id.equalsIgnoreCase("Upper"))  target = UCharacter.toUpperCase(Locale.US, source);
2649
2650            expect(t, source, target);
2651        }
2652        for (int i = 0; i < registerRules.length; ++i) {
2653            Transliterator.unregister(registerRules[i][0]);
2654        }
2655    }
2656
2657    // seems like there should be an easier way to just register an instance of a transliterator
2658
2659    static class DummyFactory implements Transliterator.Factory {
2660        static DummyFactory singleton = new DummyFactory();
2661        static HashMap m = new HashMap();
2662
2663        // Since Transliterators are immutable, we don't have to clone on set & get
2664        static void add(String ID, Transliterator t) {
2665            m.put(ID, t);
2666            //System.out.println("Registering: " + ID + ", " + t.toRules(true));
2667            Transliterator.registerFactory(ID, singleton);
2668        }
2669        public Transliterator getInstance(String ID) {
2670            return (Transliterator) m.get(ID);
2671        }
2672    }
2673
2674    public void TestCasing() {
2675        Transliterator toLower = Transliterator.getInstance("lower");
2676        Transliterator toCasefold = Transliterator.getInstance("casefold");
2677        Transliterator toUpper = Transliterator.getInstance("upper");
2678        Transliterator toTitle = Transliterator.getInstance("title");
2679        for (int i = 0; i < 0x600; ++i) {
2680            String s = UTF16.valueOf(i);
2681
2682            String lower = UCharacter.toLowerCase(ULocale.ROOT, s);
2683            assertEquals("Lowercase", lower, toLower.transform(s));
2684
2685            String casefold = UCharacter.foldCase(s, true);
2686            assertEquals("Casefold", casefold, toCasefold.transform(s));
2687
2688            String title = UCharacter.toTitleCase(ULocale.ROOT, s, null);
2689            assertEquals("Title", title, toTitle.transform(s));
2690
2691            String upper = UCharacter.toUpperCase(ULocale.ROOT, s);
2692            assertEquals("Upper", upper, toUpper.transform(s));
2693        }
2694    }
2695
2696    public void TestSurrogateCasing () {
2697        // check that casing handles surrogates
2698        // titlecase is currently defective
2699        int dee = UTF16.charAt(DESERET_dee,0);
2700        int DEE = UCharacter.toTitleCase(dee);
2701        if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
2702            errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16));
2703        }
2704
2705        if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) {
2706            errln("Fails uppercase of surrogates");
2707        }
2708
2709        if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) {
2710            errln("Fails lowercase of surrogates");
2711        }
2712    }
2713
2714    // Check to see that incremental gets at least part way through a reasonable string.
2715
2716    public void TestIncrementalProgress() {
2717        String latinTest = "The Quick Brown Fox.";
2718        String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest);
2719        String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest);
2720        String[][] tests = {
2721                {"Any", latinTest},
2722                {"Latin", latinTest},
2723                {"Halfwidth", latinTest},
2724                {"Devanagari", devaTest},
2725                {"Katakana", kataTest},
2726        };
2727
2728        Enumeration sources = Transliterator.getAvailableSources();
2729        while(sources.hasMoreElements()) {
2730            String source = (String) sources.nextElement();
2731            String test = findMatch(source, tests);
2732            if (test == null) {
2733                logln("Skipping " + source + "-X");
2734                continue;
2735            }
2736            Enumeration targets = Transliterator.getAvailableTargets(source);
2737            while(targets.hasMoreElements()) {
2738                String target = (String) targets.nextElement();
2739                Enumeration variants = Transliterator.getAvailableVariants(source, target);
2740                while(variants.hasMoreElements()) {
2741                    String variant = (String) variants.nextElement();
2742                    String id = source + "-" + target + "/" + variant;
2743                    logln("id: " + id);
2744
2745                    String filter = getTranslitTestFilter();
2746                    if (filter != null && id.indexOf(filter) < 0) continue;
2747
2748                    Transliterator t = Transliterator.getInstance(id);
2749                    CheckIncrementalAux(t, test);
2750
2751                    String rev = t.transliterate(test);
2752                    Transliterator inv = t.getInverse();
2753                    CheckIncrementalAux(inv, rev);
2754                }
2755            }
2756        }
2757    }
2758
2759    public String findMatch (String source, String[][] pairs) {
2760        for (int i = 0; i < pairs.length; ++i) {
2761            if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1];
2762        }
2763        return null;
2764    }
2765
2766    public void CheckIncrementalAux(Transliterator t, String input) {
2767
2768        Replaceable test = new ReplaceableString(input);
2769        Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length());
2770        t.transliterate(test, pos);
2771        boolean gotError = false;
2772
2773        // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
2774
2775        if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) {
2776            errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2777            gotError = true;
2778        } else {
2779            logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos));
2780        }
2781        t.finishTransliteration(test, pos);
2782        if (pos.start != pos.limit) {
2783            errln("Incomplete, " + t.getID() + ":  " + UtilityExtensions.formatInput(test, pos));
2784            gotError = true;
2785        }
2786        if(!gotError){
2787            //errln("FAIL: Did not get expected error");
2788        }
2789    }
2790
2791    public void TestFunction() {
2792        // Careful with spacing and ';' here:  Phrase this exactly
2793        // as toRules() is going to return it.  If toRules() changes
2794        // with regard to spacing or ';', then adjust this string.
2795        String rule =
2796            "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
2797
2798        Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2799        if (t == null) {
2800            errln("FAIL: createFromRules failed");
2801            return;
2802        }
2803
2804        String r = t.toRules(true);
2805        if (r.equals(rule)) {
2806            logln("OK: toRules() => " + r);
2807        } else {
2808            errln("FAIL: toRules() => " + r +
2809                    ", expected " + rule);
2810        }
2811
2812        expect(t, "The Quick Brown Fox",
2813        "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
2814        rule =
2815            "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
2816
2817        t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2818        if (t == null) {
2819            errln("FAIL: createFromRules failed");
2820            return;
2821        }
2822
2823        r = t.toRules(true);
2824        if (r.equals(rule)) {
2825            logln("OK: toRules() => " + r);
2826        } else {
2827            errln("FAIL: toRules() => " + r +
2828                    ", expected " + rule);
2829        }
2830
2831        expect(t, "\u0301",
2832        "U+0301 \\N{COMBINING ACUTE ACCENT}");
2833    }
2834
2835    public void TestInvalidBackRef() {
2836        String rule =  ". > $1;";
2837        String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
2838        try {
2839            Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2840            if (t != null) {
2841                errln("FAIL: createFromRules should have returned NULL");
2842            }
2843            errln("FAIL: Ok: . > $1; => no error");
2844            Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD);
2845            if (t2 != null) {
2846                errln("FAIL: createFromRules should have returned NULL");
2847            }
2848            errln("FAIL: Ok: . > $1; => no error");
2849        } catch (IllegalArgumentException e) {
2850            logln("Ok: . > $1; => " + e.getMessage());
2851        }
2852    }
2853
2854    public void TestMulticharStringSet() {
2855        // Basic testing
2856        String rule =
2857            "       [{aa}]       > x;" +
2858            "         a          > y;" +
2859            "       [b{bc}]      > z;" +
2860            "[{gd}] { e          > q;" +
2861            "         e } [{fg}] > r;" ;
2862
2863        Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2864        if (t == null) {
2865            errln("FAIL: createFromRules failed");
2866            return;
2867        }
2868
2869        expect(t, "a aa ab bc d gd de gde gdefg ddefg",
2870        "y x yz z d gd de gdq gdqfg ddrfg");
2871
2872        // Overlapped string test.  Make sure that when multiple
2873        // strings can match that the longest one is matched.
2874        rule =
2875            "    [a {ab} {abc}]    > x;" +
2876            "           b          > y;" +
2877            "           c          > z;" +
2878            " q [t {st} {rst}] { e > p;" ;
2879
2880        t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD);
2881        if (t == null) {
2882            errln("FAIL: createFromRules failed");
2883            return;
2884        }
2885
2886        expect(t, "a ab abc qte qste qrste",
2887        "x x x qtp qstp qrstp");
2888    }
2889
2890    /**
2891     * Test that user-registered transliterators can be used under function
2892     * syntax.
2893     */
2894    public void TestUserFunction() {
2895        Transliterator t;
2896
2897        // There's no need to register inverses if we don't use them
2898        TestUserFunctionFactory.add("Any-gif",
2899                Transliterator.createFromRules("gif",
2900                        "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
2901                        Transliterator.FORWARD));
2902        //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
2903
2904        TestUserFunctionFactory.add("Any-RemoveCurly",
2905                Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD));
2906        //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
2907
2908        logln("Trying &hex");
2909        t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD);
2910        logln("Registering");
2911        TestUserFunctionFactory.add("Any-hex2", t);
2912        t = Transliterator.getInstance("Any-hex2");
2913        expect(t, "abc", "\\u0061\\u0062\\u0063");
2914
2915        logln("Trying &gif");
2916        t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
2917        logln("Registering");
2918        TestUserFunctionFactory.add("Any-gif2", t);
2919        t = Transliterator.getInstance("Any-gif2");
2920        expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" +
2921        "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
2922
2923        // Test that filters are allowed after &
2924        t = Transliterator.createFromRules("test",
2925                "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD);
2926        expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
2927
2928        // Unregister our test stuff
2929        TestUserFunctionFactory.unregister();
2930    }
2931
2932    static class TestUserFunctionFactory implements Transliterator.Factory {
2933        static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
2934        static HashMap m = new HashMap();
2935
2936        static void add(String ID, Transliterator t) {
2937            m.put(new CaseInsensitiveString(ID), t);
2938            Transliterator.registerFactory(ID, singleton);
2939        }
2940
2941        public Transliterator getInstance(String ID) {
2942            return (Transliterator) m.get(new CaseInsensitiveString(ID));
2943        }
2944
2945        static void unregister() {
2946            Iterator ids = m.keySet().iterator();
2947            while (ids.hasNext()) {
2948                CaseInsensitiveString id = (CaseInsensitiveString) ids.next();
2949                Transliterator.unregister(id.getString());
2950                ids.remove(); // removes pair from m
2951            }
2952        }
2953    }
2954
2955    /**
2956     * Test the Any-X transliterators.
2957     */
2958    public void TestAnyX() {
2959        Transliterator anyLatin =
2960            Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
2961
2962        expect(anyLatin,
2963                "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
2964        "greek:abkABK hiragana:abuku cyrillic:abc");
2965    }
2966
2967    /**
2968     * Test Any-X transliterators with sample letters from all scripts.
2969     */
2970    public void TestAny() {
2971        UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze();
2972        StringBuffer testString = new StringBuffer();
2973        for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
2974            UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic);
2975            int count = 5;
2976            for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) {
2977                testString.append(it.getString());
2978                if (--count < 0) break;
2979            }
2980        }
2981        logln("Sample set for Any-Latin: " + testString);
2982        Transliterator anyLatin = Transliterator.getInstance("any-Latn");
2983        String result = anyLatin.transliterate(testString.toString());
2984        logln("Sample result for Any-Latin: " + result);
2985    }
2986
2987
2988    /**
2989     * Test the source and target set API.  These are only implemented
2990     * for RBT and CompoundTransliterator at this time.
2991     */
2992    public void TestSourceTargetSet() {
2993        // Rules
2994        String r =
2995            "a > b; " +
2996            "r [x{lu}] > q;";
2997
2998        // Expected source
2999        UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
3000
3001        // Expected target
3002        UnicodeSet expTrg = new UnicodeSet("[bq]");
3003
3004        Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3005        UnicodeSet src = t.getSourceSet();
3006        UnicodeSet trg = t.getTargetSet();
3007
3008        if (src.equals(expSrc) && trg.equals(expTrg)) {
3009            logln("Ok: " + r + " => source = " + src.toPattern(true) +
3010                    ", target = " + trg.toPattern(true));
3011        } else {
3012            errln("FAIL: " + r + " => source = " + src.toPattern(true) +
3013                    ", expected " + expSrc.toPattern(true) +
3014                    "; target = " + trg.toPattern(true) +
3015                    ", expected " + expTrg.toPattern(true));
3016        }
3017    }
3018
3019    public void TestSourceTargetSet2() {
3020
3021
3022        Normalizer2 nfc = Normalizer2.getNFCInstance();
3023        Normalizer2 nfd = Normalizer2.getNFDInstance();
3024
3025        //        Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE);
3026        //        UnicodeSet nfkdSource = new UnicodeSet();
3027        //        UnicodeSet nfkdTarget = new UnicodeSet();
3028        //        for (int i = 0; i <= 0x10FFFF; ++i) {
3029        //            if (nfkd.isInert(i)) {
3030        //                continue;
3031        //            }
3032        //            nfkdSource.add(i);
3033        //            String t = nfkd.getDecomposition(i);
3034        //            if (t != null) {
3035        //                nfkdTarget.addAll(t);
3036        //            } else {
3037        //                nfkdTarget.add(i);
3038        //            }
3039        //        }
3040        //        nfkdSource.freeze();
3041        //        nfkdTarget.freeze();
3042        //        logln("NFKD Source: " + nfkdSource.toPattern(false));
3043        //        logln("NFKD Target: " + nfkdTarget.toPattern(false));
3044
3045        UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap();
3046        UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap();
3047        UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze();
3048        CanonicalIterator can = new CanonicalIterator("");
3049
3050        UnicodeSet disorderedMarks = new UnicodeSet();
3051
3052        for (int i = 0; i <= 0x10FFFF; ++i) {
3053            String s = nfd.getDecomposition(i);
3054            if (s == null) {
3055                continue;
3056            }
3057
3058            can.setSource(s);
3059            for (String t = can.next(); t != null; t = can.next()) {
3060                disorderedMarks.add(t);
3061            }
3062
3063            // if s has two code points, (or more), add the lead/trail information
3064            int first = s.codePointAt(0);
3065            int firstCount = Character.charCount(first);
3066            if (s.length() == firstCount) continue;
3067            String trailString = s.substring(firstCount);
3068
3069            // add all the trail characters
3070            if (!nonStarters.containsSome(trailString)) {
3071                continue;
3072            }
3073            UnicodeSet trailSet = leadToTrail.get(first);
3074            if (trailSet == null) {
3075                leadToTrail.put(first, trailSet = new UnicodeSet());
3076            }
3077            trailSet.addAll(trailString); // add remaining trails
3078
3079            // add the sources
3080            UnicodeSet sourcesSet = leadToSources.get(first);
3081            if (sourcesSet == null) {
3082                leadToSources.put(first, sourcesSet = new UnicodeSet());
3083            }
3084            sourcesSet.add(i);
3085        }
3086
3087
3088        for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) {
3089            String lead = x.getKey();
3090            UnicodeSet sources = x.getValue();
3091            UnicodeSet trailSet = leadToTrail.get(lead);
3092            for (String source : sources) {
3093                for (String trail : trailSet) {
3094                    can.setSource(source + trail);
3095                    for (String t = can.next(); t != null; t = can.next()) {
3096                        if (t.endsWith(trail)) continue;
3097                        disorderedMarks.add(t);
3098                    }
3099                }
3100            }
3101        }
3102
3103
3104        for (String s : nonStarters) {
3105            disorderedMarks.add("\u0345" + s);
3106            disorderedMarks.add(s+"\u0323");
3107            String xx = nfc.normalize("\u01EC" + s);
3108            if (!xx.startsWith("\u01EC")) {
3109                logln("??");
3110            }
3111        }
3112
3113        //        for (int i = 0; i <= 0x10FFFF; ++i) {
3114        //            String s = nfkd.getDecomposition(i);
3115        //            if (s != null) {
3116        //                disorderedMarks.add(s);
3117        //                disorderedMarks.add(nfc.normalize(s));
3118        //                addDerivedStrings(nfc, disorderedMarks, s);
3119        //            }
3120        //            s = nfd.getDecomposition(i);
3121        //            if (s != null) {
3122        //                disorderedMarks.add(s);
3123        //            }
3124        //            if (!nfc.isInert(i)) {
3125        //                if (i == 0x00C0) {
3126        //                    logln("\u00C0");
3127        //                }
3128        //                can.setSource(s+"\u0334");
3129        //                for (String t = can.next(); t != null; t = can.next()) {
3130        //                    addDerivedStrings(nfc, disorderedMarks, t);
3131        //                }
3132        //                can.setSource(s+"\u0345");
3133        //                for (String t = can.next(); t != null; t = can.next()) {
3134        //                    addDerivedStrings(nfc, disorderedMarks, t);
3135        //                }
3136        //                can.setSource(s+"\u0323");
3137        //                for (String t = can.next(); t != null; t = can.next()) {
3138        //                    addDerivedStrings(nfc, disorderedMarks, t);
3139        //                }
3140        //            }
3141        //        }
3142        logln("Test cases: " + disorderedMarks.size());
3143        disorderedMarks.addAll(0,0x10FFFF).freeze();
3144        logln("isInert \u0104 " + nfc.isInert('\u0104'));
3145
3146        Object[][] rules = {
3147                {":: [:sc=COMMON:] any-name;", null},
3148
3149                {":: [:Greek:] hex-any/C;", null},
3150                {":: [:Greek:] any-hex/C;", null},
3151
3152                {":: [[:Mn:][:Me:]] remove;", null},
3153                {":: [[:Mn:][:Me:]] null;", null},
3154
3155
3156                {":: lower;", null},
3157                {":: upper;", null},
3158                {":: title;", null},
3159                {":: CaseFold;", null},
3160
3161                {":: NFD;", null},
3162                {":: NFC;", null},
3163                {":: NFKD;", null},
3164                {":: NFKC;", null},
3165
3166                {":: [[:Mn:][:Me:]] NFKD;", null},
3167                {":: Latin-Greek;", null},
3168                {":: [:Latin:] NFKD;", null},
3169                {":: NFKD;", null},
3170                {":: NFKD;\n" +
3171                    ":: [[:Mn:][:Me:]] remove;\n" +
3172                    ":: NFC;", null},
3173        };
3174        for (Object[] rulex : rules) {
3175            String rule = (String) rulex[0];
3176            Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD);
3177            UnicodeSet actualSource = trans.getSourceSet();
3178            UnicodeSet actualTarget = trans.getTargetSet();
3179            UnicodeSet empiricalSource = new UnicodeSet();
3180            UnicodeSet empiricalTarget = new UnicodeSet();
3181            String ruleDisplay = rule.replace("\n", "\t\t");
3182            UnicodeSet toTest = disorderedMarks;
3183            //            if (rulex[1] != null) {
3184            //                toTest = new UnicodeSet(disorderedMarks);
3185            //                toTest.addAll((UnicodeSet) rulex[1]);
3186            //            }
3187
3188            String test = nfd.normalize("\u0104");
3189            boolean DEBUG = true;
3190            @SuppressWarnings("unused")
3191            int count = 0; // for debugging
3192            for (String s : toTest) {
3193                if (s.equals(test)) {
3194                    logln(test);
3195                }
3196                String t = trans.transform(s);
3197                if (!s.equals(t)) {
3198                    if (!isAtomic(s, t, trans)) {
3199                        isAtomic(s, t, trans);
3200                        continue;
3201                    }
3202
3203                    // only keep the part that changed; so skip the front and end.
3204                    //                    int start = findSharedStartLength(s,t);
3205                    //                    int end = findSharedEndLength(s,t);
3206                    //                    if (start != 0 || end != 0) {
3207                    //                        s = s.substring(start, s.length() - end);
3208                    //                        t = t.substring(start, t.length() - end);
3209                    //                    }
3210                    if (DEBUG) {
3211                        if (!actualSource.containsAll(s)) {
3212                            count++;
3213                        }
3214                        if (!actualTarget.containsAll(t)) {
3215                            count++;
3216                        }
3217                    }
3218                    addSourceTarget(s, empiricalSource, t, empiricalTarget);
3219                }
3220            }
3221            assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK);
3222            assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK);
3223        }
3224    }
3225
3226    public void TestSourceTargetSetFilter() {
3227        String[][] tests = {
3228                // rules, expectedTarget-FORWARD, expectedTarget-REVERSE
3229                {"[] Latin-Greek", null, "[\']"},
3230                {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"},
3231                {"[] Any-Latin"},
3232                {"[] casefold"},
3233                {"[] NFKD;"},
3234                {"[] NFKC;"},
3235                {"[] hex"},
3236                {"[] lower"},
3237                {"[] null"},
3238                {"[] remove"},
3239                {"[] title"},
3240                {"[] upper"},
3241        };
3242        UnicodeSet expectedSource = UnicodeSet.EMPTY;
3243        for (String[] testPair : tests) {
3244            String test = testPair[0];
3245            Transliterator t0;
3246            try {
3247                t0 = Transliterator.getInstance(test);
3248            } catch (Exception e) {
3249                t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD);
3250            }
3251            Transliterator t1;
3252            try {
3253                t1 = t0.getInverse();
3254            } catch (Exception e) {
3255                t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE);
3256            }
3257            int targetIndex = 0;
3258            for (Transliterator t : new Transliterator[]{t0, t1}) {
3259                boolean ok;
3260                UnicodeSet source = t.getSourceSet();
3261                String direction = t == t0 ? "FORWARD\t" : "REVERSE\t";
3262                targetIndex++;
3263                UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource
3264                        : testPair[targetIndex] == null ? expectedSource
3265                                : testPair[targetIndex].length() == 0 ? expectedSource
3266                                        : new UnicodeSet(testPair[targetIndex]);
3267                ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source);
3268                if (!ok) { // for debugging
3269                    source = t.getSourceSet();
3270                }
3271                UnicodeSet target = t.getTargetSet();
3272                ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target);
3273                if (!ok) { // for debugging
3274                    target = t.getTargetSet();
3275                }
3276            }
3277        }
3278    }
3279
3280    private boolean isAtomic(String s, String t, Transliterator trans) {
3281        for (int i = 1; i < s.length(); ++i) {
3282            if (!CharSequences.onCharacterBoundary(s, i)) {
3283                continue;
3284            }
3285            String q = trans.transform(s.substring(0,i));
3286            if (t.startsWith(q)) {
3287                String r = trans.transform(s.substring(i));
3288                if (t.length() == q.length() + r.length() && t.endsWith(r)) {
3289                    return false;
3290                }
3291            }
3292        }
3293        return true;
3294        //        // make sure that every part is different
3295        //        if (s.codePointCount(0, s.length()) > 1) {
3296        //            int[] codePoints = It.codePoints(s);
3297        //            for (int k = 0; k < codePoints.length; ++k) {
3298        //                int pos = indexOf(t,codePoints[k]);
3299        //                if (pos >= 0) {
3300        //                    int x;
3301        //                }
3302        //            }
3303        //            if (s.contains("\u00C0")) {
3304        //                logln("\u00C0");
3305        //            }
3306        //        }
3307    }
3308
3309    private void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) {
3310        expectedSource.addAll(s);
3311        if (t.length() > 0) {
3312            expectedTarget.addAll(t);
3313        }
3314    }
3315
3316//    private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) {
3317//        disorderedMarks.add(s);
3318//        for (int j = 1; j < s.length(); ++j) {
3319//            if (CharSequences.onCharacterBoundary(s, j)) {
3320//                String shorter = s.substring(0,j);
3321//                disorderedMarks.add(shorter);
3322//                disorderedMarks.add(nfc.normalize(shorter) + s.substring(j));
3323//            }
3324//        }
3325//    }
3326
3327    public void TestCharUtils() {
3328        String[][] startTests = {
3329                {"1", "a", "ab"},
3330                {"0", "a", "xb"},
3331                {"0", "\uD800", "\uD800\uDC01"},
3332                {"1", "\uD800a", "\uD800b"},
3333                {"0", "\uD800\uDC00", "\uD800\uDC01"},
3334        };
3335        for (String[] row : startTests) {
3336            int actual = findSharedStartLength(row[1], row[2]);
3337            assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")",
3338                    Integer.parseInt(row[0]),
3339                    actual);
3340        }
3341        String[][] endTests = {
3342                {"0", "\uDC00", "\uD801\uDC00"},
3343                {"1", "a", "ba"},
3344                {"0", "a", "bx"},
3345                {"1", "a\uDC00", "b\uDC00"},
3346                {"0", "\uD800\uDC00", "\uD801\uDC00"},
3347        };
3348        for (String[] row : endTests) {
3349            int actual = findSharedEndLength(row[1], row[2]);
3350            assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")",
3351                    Integer.parseInt(row[0]),
3352                    actual);
3353        }
3354    }
3355
3356    /**
3357     * @param s
3358     * @param t
3359     * @return
3360     */
3361    // TODO make generally available
3362    private static int findSharedStartLength(CharSequence s, CharSequence t) {
3363        int min = Math.min(s.length(), t.length());
3364        int i;
3365        char sch, tch;
3366        for (i = 0; i < min; ++i) {
3367            sch = s.charAt(i);
3368            tch = t.charAt(i);
3369            if (sch != tch) {
3370                break;
3371            }
3372        }
3373        return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1;
3374    }
3375
3376    /**
3377     * @param s
3378     * @param t
3379     * @return
3380     */
3381    // TODO make generally available
3382    private static int findSharedEndLength(CharSequence s, CharSequence t) {
3383        int slength = s.length();
3384        int tlength = t.length();
3385        int min = Math.min(slength, tlength);
3386        int i;
3387        char sch, tch;
3388        // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho'
3389        for (i = 0; i < min; ++i) {
3390            sch = s.charAt(slength - i - 1);
3391            tch = t.charAt(tlength - i - 1);
3392            if (sch != tch) {
3393                break;
3394            }
3395        }
3396        return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1;
3397    }
3398
3399    enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK}
3400
3401    void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) {
3402        boolean haveError = false;
3403        if (!actual.containsAll(empirical)) {
3404            UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual);
3405            errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing));
3406            haveError = true;
3407        }
3408        if (!empirical.containsAll(actual)) {
3409            UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical);
3410            logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra));
3411            haveError = true;
3412        }
3413        if (!haveError) {
3414            logln("OK " + message + ' ' + toPattern(empirical));
3415        }
3416    }
3417
3418    private String toPattern(UnicodeSet missing) {
3419        String result = missing.toPattern(false);
3420        if (result.length() < 200) {
3421            return result;
3422        }
3423        return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026";
3424    }
3425
3426
3427    /**
3428     * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3429     */
3430    public void TestPatternWhitespace() {
3431        // Rules
3432        String r = "a > \u200E b;";
3433
3434        Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD);
3435
3436        expect(t, "a", "b");
3437
3438        // UnicodeSet
3439        UnicodeSet set = new UnicodeSet("[a \u200E]");
3440
3441        if (set.contains(0x200E)) {
3442            errln("FAIL: U+200E not being ignored by UnicodeSet");
3443        }
3444    }
3445
3446    public void TestAlternateSyntax() {
3447        // U+2206 == &
3448        // U+2190 == <
3449        // U+2192 == >
3450        // U+2194 == <>
3451        expect("a \u2192 x; b \u2190 y; c \u2194 z",
3452                "abc",
3453        "xbz");
3454        expect("([:^ASCII:]) \u2192 \u2206Name($1);",
3455                "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
3456        "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
3457    }
3458
3459    public void TestPositionAPI() {
3460        Transliterator.Position a = new Transliterator.Position(3,5,7,11);
3461        Transliterator.Position b = new Transliterator.Position(a);
3462        Transliterator.Position c = new Transliterator.Position();
3463        c.set(a);
3464        // Call the toString() API:
3465        if (a.equals(b) && a.equals(c)) {
3466            logln("Ok: " + a + " == " + b + " == " + c);
3467        } else {
3468            errln("FAIL: " + a + " != " + b + " != " + c);
3469        }
3470    }
3471
3472    //======================================================================
3473    // New tests for the ::BEGIN/::END syntax
3474    //======================================================================
3475
3476    private static final String[] BEGIN_END_RULES = new String[] {
3477        // [0]
3478        "abc > xy;"
3479        + "aba > z;",
3480
3481        // [1]
3482        /*
3483        "::BEGIN;"
3484        + "abc > xy;"
3485        + "::END;"
3486        + "::BEGIN;"
3487        + "aba > z;"
3488        + "::END;",
3489         */
3490        "", // test case commented out below, this is here to keep from messing up the indexes
3491
3492        // [2]
3493        /*
3494        "abc > xy;"
3495        + "::BEGIN;"
3496        + "aba > z;"
3497        + "::END;",
3498         */
3499        "", // test case commented out below, this is here to keep from messing up the indexes
3500
3501        // [3]
3502        /*
3503        "::BEGIN;"
3504        + "abc > xy;"
3505        + "::END;"
3506        + "aba > z;",
3507         */
3508        "", // test case commented out below, this is here to keep from messing up the indexes
3509
3510        // [4]
3511        "abc > xy;"
3512        + "::Null;"
3513        + "aba > z;",
3514
3515        // [5]
3516        "::Upper;"
3517        + "ABC > xy;"
3518        + "AB > x;"
3519        + "C > z;"
3520        + "::Upper;"
3521        + "XYZ > p;"
3522        + "XY > q;"
3523        + "Z > r;"
3524        + "::Upper;",
3525
3526        // [6]
3527        "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3528        + "$delim = [\\-$ws];"
3529        + "$ws $delim* > ' ';"
3530        + "'-' $delim* > '-';",
3531
3532        // [7]
3533        "::Null;"
3534        + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3535        + "$delim = [\\-$ws];"
3536        + "$ws $delim* > ' ';"
3537        + "'-' $delim* > '-';",
3538
3539        // [8]
3540        "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3541        + "$delim = [\\-$ws];"
3542        + "$ws $delim* > ' ';"
3543        + "'-' $delim* > '-';"
3544        + "::Null;",
3545
3546        // [9]
3547        "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3548        + "$delim = [\\-$ws];"
3549        + "::Null;"
3550        + "$ws $delim* > ' ';"
3551        + "'-' $delim* > '-';",
3552
3553        // [10]
3554        /*
3555        "::BEGIN;"
3556        + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3557        + "$delim = [\\-$ws];"
3558        + "::END;"
3559        + "$ws $delim* > ' ';"
3560        + "'-' $delim* > '-';",
3561         */
3562        "", // test case commented out below, this is here to keep from messing up the indexes
3563
3564        // [11]
3565        /*
3566        "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3567        + "$delim = [\\-$ws];"
3568        + "::BEGIN;"
3569        + "$ws $delim* > ' ';"
3570        + "'-' $delim* > '-';"
3571        + "::END;",
3572         */
3573        "", // test case commented out below, this is here to keep from messing up the indexes
3574
3575        // [12]
3576        /*
3577        "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3578        + "$delim = [\\-$ws];"
3579        + "$ab = [ab];"
3580        + "::BEGIN;"
3581        + "$ws $delim* > ' ';"
3582        + "'-' $delim* > '-';"
3583        + "::END;"
3584        + "::BEGIN;"
3585        + "$ab { ' ' } $ab > '-';"
3586        + "c { ' ' > ;"
3587        + "::END;"
3588        + "::BEGIN;"
3589        + "'a-a' > a\\%|a;"
3590        + "::END;",
3591         */
3592        "", // test case commented out below, this is here to keep from messing up the indexes
3593
3594        // [13]
3595        "$ws = [[:Separator:][\\u0009-\\u000C]$];"
3596        + "$delim = [\\-$ws];"
3597        + "$ab = [ab];"
3598        + "::Null;"
3599        + "$ws $delim* > ' ';"
3600        + "'-' $delim* > '-';"
3601        + "::Null;"
3602        + "$ab { ' ' } $ab > '-';"
3603        + "c { ' ' > ;"
3604        + "::Null;"
3605        + "'a-a' > a\\%|a;",
3606
3607        // [14]
3608        /*
3609        "::[abc];"
3610        + "::BEGIN;"
3611        + "abc > xy;"
3612        + "::END;"
3613        + "::BEGIN;"
3614        + "aba > yz;"
3615        + "::END;"
3616        + "::Upper;",
3617         */
3618        "", // test case commented out below, this is here to keep from messing up the indexes
3619
3620        // [15]
3621        "::[abc];"
3622        + "abc > xy;"
3623        + "::Null;"
3624        + "aba > yz;"
3625        + "::Upper;",
3626
3627        // [16]
3628        /*
3629        "::[abc];"
3630        + "::BEGIN;"
3631        + "abc <> xy;"
3632        + "::END;"
3633        + "::BEGIN;"
3634        + "aba <> yz;"
3635        + "::END;"
3636        + "::Upper(Lower);"
3637        + "::([XYZ]);",
3638         */
3639        "", // test case commented out below, this is here to keep from messing up the indexes
3640
3641        // [17]
3642        "::[abc];"
3643        + "abc <> xy;"
3644        + "::Null;"
3645        + "aba <> yz;"
3646        + "::Upper(Lower);"
3647        + "::([XYZ]);"
3648    };
3649
3650    /*
3651(This entire test is commented out below and will need some heavy revision when we re-add
3652the ::BEGIN/::END stuff)
3653    private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
3654        // [7]
3655        "::BEGIN;"
3656        + "abc > xy;"
3657        + "::BEGIN;"
3658        + "aba > z;"
3659        + "::END;"
3660        + "::END;",
3661
3662        // [8]
3663        "abc > xy;"
3664        + " aba > z;"
3665        + "::END;",
3666
3667        // [9]
3668        "::BEGIN;"
3669        + "::Upper;"
3670        + "::END;"
3671    };
3672     */
3673
3674    private static final String[] BEGIN_END_TEST_CASES = new String[] {
3675        BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
3676        //        BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
3677        //        BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
3678        //        BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
3679        BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
3680        BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
3681
3682        BEGIN_END_RULES[6], "e   e - e---e-  e", "e e e-e-e",
3683        BEGIN_END_RULES[7], "e   e - e---e-  e", "e e e-e-e",
3684        BEGIN_END_RULES[8], "e   e - e---e-  e", "e e e-e-e",
3685        BEGIN_END_RULES[9], "e   e - e---e-  e", "e e e-e-e",
3686        //        BEGIN_END_RULES[10], "e   e - e---e-  e", "e e e-e-e",
3687        //        BEGIN_END_RULES[11], "e   e - e---e-  e", "e e e-e-e",
3688        //        BEGIN_END_RULES[12], "e   e - e---e-  e", "e e e-e-e",
3689        //        BEGIN_END_RULES[12], "a    a    a    a", "a%a%a%a",
3690        //        BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
3691        BEGIN_END_RULES[13], "e   e - e---e-  e", "e e e-e-e",
3692        BEGIN_END_RULES[13], "a    a    a    a", "a%a%a%a",
3693        BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
3694
3695        //        BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3696        BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3697        //        BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3698        BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
3699    };
3700
3701    public void TestBeginEnd() {
3702        // run through the list of test cases above
3703        for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3704            expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3705        }
3706
3707        // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
3708        Transliterator reversed  = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3709                Transliterator.REVERSE);
3710        expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3711
3712        // finally, run through the list of syntactically-ill-formed rule sets above and make sure
3713        // that all of them cause errors
3714        /*
3715(commented out until we have the real ::BEGIN/::END stuff in place
3716        for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
3717            try {
3718                Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
3719                        Transliterator.FORWARD);
3720                errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
3721            }
3722            catch (IllegalArgumentException e) {
3723                // this is supposed to happen; do nothing here
3724            }
3725        }
3726         */
3727    }
3728
3729    public void TestBeginEndToRules() {
3730        // run through the same list of test cases we used above, but this time, instead of just
3731        // instantiating a Transliterator from the rules and running the test against it, we instantiate
3732        // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
3733        // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
3734        // to (i.e., does the same thing as) the original rule set
3735        for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3736            Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i],
3737                    Transliterator.FORWARD);
3738            String rules = t.toRules(false);
3739            Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD);
3740            expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]);
3741        }
3742
3743        // do the same thing for the reversible test case
3744        Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17],
3745                Transliterator.REVERSE);
3746        String rules = reversed.toRules(false);
3747        Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD);
3748        expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3749    }
3750
3751    public void TestRegisterAlias() {
3752        String longID = "Lower;[aeiou]Upper";
3753        String shortID = "Any-CapVowels";
3754        String reallyShortID = "CapVowels";
3755
3756        Transliterator.registerAlias(shortID, longID);
3757
3758        Transliterator t1 = Transliterator.getInstance(longID);
3759        Transliterator t2 = Transliterator.getInstance(reallyShortID);
3760
3761        if (!t1.getID().equals(longID))
3762            errln("Transliterator instantiated with long ID doesn't have long ID");
3763        if (!t2.getID().equals(reallyShortID))
3764            errln("Transliterator instantiated with short ID doesn't have short ID");
3765
3766        if (!t1.toRules(true).equals(t2.toRules(true)))
3767            errln("Alias transliterators aren't the same");
3768
3769        Transliterator.unregister(shortID);
3770
3771        try {
3772            t1 = Transliterator.getInstance(shortID);
3773            errln("Instantiation with short ID succeeded after short ID was unregistered");
3774        }
3775        catch (IllegalArgumentException e) {
3776        }
3777
3778        // try the same thing again, but this time with something other than
3779        // an instance of CompoundTransliterator
3780        String realID = "Latin-Greek";
3781        String fakeID = "Latin-dlgkjdflkjdl";
3782        Transliterator.registerAlias(fakeID, realID);
3783
3784        t1 = Transliterator.getInstance(realID);
3785        t2 = Transliterator.getInstance(fakeID);
3786
3787        if (!t1.toRules(true).equals(t2.toRules(true)))
3788            errln("Alias transliterators aren't the same");
3789
3790        Transliterator.unregister(fakeID);
3791    }
3792
3793    /**
3794     * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
3795     */
3796    public void TestHalfwidthFullwidth() {
3797        Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth");
3798        Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth");
3799
3800        // Array of 3n items
3801        // Each item is
3802        //   "hf"|"fh"|"both",
3803        //   <Halfwidth>,
3804        //   <Fullwidth>
3805        String[] DATA = {
3806                "both",
3807                "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020",
3808                "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000",
3809        };
3810
3811        for (int i=0; i<DATA.length; i+=3) {
3812            switch (DATA[i].charAt(0)) {
3813            case 'h': // Halfwidth-Fullwidth only
3814                expect(hf, DATA[i+1], DATA[i+2]);
3815                break;
3816            case 'f': // Fullwidth-Halfwidth only
3817                expect(fh, DATA[i+2], DATA[i+1]);
3818                break;
3819            case 'b': // both directions
3820                expect(hf, DATA[i+1], DATA[i+2]);
3821                expect(fh, DATA[i+2], DATA[i+1]);
3822                break;
3823            }
3824        }
3825
3826    }
3827
3828    /**
3829     *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
3830     *              TODO: confirm that the expected results are correct.
3831     *              For now, test just confirms that C++ and Java give identical results.
3832     */
3833    public void TestThai() {
3834        Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD);
3835        String thaiText =
3836            "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" +
3837            "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" +
3838            "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" +
3839            "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" +
3840            "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" +
3841            "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" +
3842            "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" +
3843            "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" +
3844            "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" +
3845            "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" +
3846            "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" +
3847            "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" +
3848            "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" +
3849            "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" +
3850            "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" +
3851            "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" +
3852            "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" +
3853            "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" +
3854            "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" +
3855            "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" +
3856            "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" +
3857            "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" +
3858            "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" +
3859            "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" +
3860            " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" +
3861            "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" +
3862            "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" +
3863            " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" +
3864            "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" +
3865            "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b.";
3866
3867        String latinText =
3868            "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" +
3869            "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" +
3870            "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" +
3871            "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" +
3872            "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" +
3873            " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " +
3874            "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" +
3875            "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" +
3876            "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" +
3877            "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" +
3878            "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" +
3879            "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" +
3880            " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" +
3881            "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" +
3882            " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" +
3883            "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" +
3884            "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" +
3885            "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb.";
3886
3887        expect(tr, thaiText, latinText);
3888    }
3889
3890
3891    //======================================================================
3892    // These tests are not mirrored (yet) in icu4c at
3893    // source/test/intltest/transtst.cpp
3894    //======================================================================
3895
3896    /**
3897     * Improve code coverage.
3898     */
3899    public void TestCoverage() {
3900        // NullTransliterator
3901        Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD);
3902        expect(t, "a", "a");
3903
3904        // Source, target set
3905        t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD);
3906        t.setFilter(new UnicodeSet("[A-Z]"));
3907        logln("source = " + t.getSourceSet());
3908        logln("target = " + t.getTargetSet());
3909
3910        t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD);
3911        logln("source = " + t.getSourceSet());
3912        logln("target = " + t.getTargetSet());
3913    }
3914    /*
3915     * Test case for threading problem in NormalizationTransliterator
3916     * reported by ticket#5160
3917     */
3918    public void TestT5160() {
3919        final String[] testData = {
3920                "a",
3921                "b",
3922                "\u09BE",
3923                "A\u0301",
3924        };
3925        final String[] expected = {
3926                "a",
3927                "b",
3928                "\u09BE",
3929                "\u00C1",
3930        };
3931        Transliterator translit = Transliterator.getInstance("NFC");
3932        NormTranslitTask[] tasks = new NormTranslitTask[testData.length];
3933        for (int i = 0; i < tasks.length; i++) {
3934            tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]);
3935        }
3936        TestUtil.runUntilDone(tasks);
3937
3938        for (int i = 0; i < tasks.length; i++) {
3939            if (tasks[i].getErrorMessage() != null) {
3940                System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage());
3941                break;
3942            }
3943        }
3944    }
3945
3946    static class NormTranslitTask implements Runnable {
3947        Transliterator translit;
3948        String testData;
3949        String expectedData;
3950        String errorMsg;
3951
3952        NormTranslitTask(Transliterator translit, String testData, String expectedData) {
3953            this.translit = translit;
3954            this.testData = testData;
3955            this.expectedData = expectedData;
3956        }
3957
3958        public void run() {
3959            errorMsg = null;
3960            StringBuffer inBuf = new StringBuffer(testData);
3961            StringBuffer expectedBuf = new StringBuffer(expectedData);
3962
3963            for(int i = 0; i < 1000; i++) {
3964                String in = inBuf.toString();
3965                String out = translit.transliterate(in);
3966                String expected = expectedBuf.toString();
3967                if (!out.equals(expected)) {
3968                    errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}";
3969                    break;
3970                }
3971                inBuf.append(testData);
3972                expectedBuf.append(expectedData);
3973            }
3974        }
3975
3976        public String getErrorMessage() {
3977            return errorMsg;
3978        }
3979    }
3980
3981    //======================================================================
3982    // Support methods
3983    //======================================================================
3984    void expect(String rules,
3985            String source,
3986            String expectedResult,
3987            Transliterator.Position pos) {
3988        Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD);
3989        expect(t, source, expectedResult, pos);
3990    }
3991
3992    void expect(String rules, String source, String expectedResult) {
3993        expect(rules, source, expectedResult, null);
3994    }
3995
3996    void expect(Transliterator t, String source, String expectedResult,
3997            Transliterator reverseTransliterator) {
3998        expect(t, source, expectedResult);
3999        if (reverseTransliterator != null) {
4000            expect(reverseTransliterator, expectedResult, source);
4001        }
4002    }
4003
4004    void expect(Transliterator t, String source, String expectedResult) {
4005        expect(t, source, expectedResult, (Transliterator.Position) null);
4006    }
4007
4008    void expect(Transliterator t, String source, String expectedResult,
4009            Transliterator.Position pos) {
4010        if (pos == null) {
4011            String result = t.transliterate(source);
4012            if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return;
4013        }
4014
4015        Transliterator.Position index = null;
4016        if (pos == null) {
4017            index = new Transliterator.Position(0, source.length(), 0, source.length());
4018        } else {
4019            index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
4020                    pos.start, pos.limit);
4021        }
4022
4023        ReplaceableString rsource = new ReplaceableString(source);
4024
4025        t.finishTransliteration(rsource, index);
4026        // Do it all at once -- below we do it incrementally
4027
4028        if (index.start != index.limit) {
4029            expectAux(t.getID() + ":UNFINISHED", source,
4030                    "start: " + index.start + ", limit: " + index.limit, false, expectedResult);
4031            return;
4032        }
4033        String result = rsource.toString();
4034        if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return;
4035
4036
4037        if (pos == null) {
4038            index = new Transliterator.Position();
4039        } else {
4040            index = new Transliterator.Position(pos.contextStart, pos.contextLimit,
4041                    pos.start, pos.limit);
4042        }
4043
4044        // Test incremental transliteration -- this result
4045        // must be the same after we finalize (see below).
4046        List<String> v = new ArrayList<String>();
4047        v.add(source);
4048        rsource.replace(0, rsource.length(), "");
4049        if (pos != null) {
4050            rsource.replace(0, 0, source);
4051            v.add(UtilityExtensions.formatInput(rsource, index));
4052            t.transliterate(rsource, index);
4053            v.add(UtilityExtensions.formatInput(rsource, index));
4054        } else {
4055            for (int i=0; i<source.length(); ++i) {
4056                //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
4057                //log.append(source.charAt(i)).append(" -> "));
4058                t.transliterate(rsource, index, source.charAt(i));
4059                //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
4060                v.add(UtilityExtensions.formatInput(rsource, index) +
4061                        ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>"));
4062            }
4063        }
4064
4065        // As a final step in keyboard transliteration, we must call
4066        // transliterate to finish off any pending partial matches that
4067        // were waiting for more input.
4068        t.finishTransliteration(rsource, index);
4069        result = rsource.toString();
4070        //log.append(" => ").append(rsource.toString());
4071        v.add(result);
4072
4073        String[] results = new String[v.size()];
4074        v.toArray(results);
4075        expectAux(t.getID() + ":Incremental", results,
4076                result.equals(expectedResult),
4077                expectedResult);
4078    }
4079
4080    boolean expectAux(String tag, String source,
4081            String result, String expectedResult) {
4082        return expectAux(tag, new String[] {source, result},
4083                result.equals(expectedResult),
4084                expectedResult);
4085    }
4086
4087    boolean expectAux(String tag, String source,
4088            String result, boolean pass,
4089            String expectedResult) {
4090        return expectAux(tag, new String[] {source, result},
4091                pass,
4092                expectedResult);
4093    }
4094
4095    boolean expectAux(String tag, String source,
4096            boolean pass,
4097            String expectedResult) {
4098        return expectAux(tag, new String[] {source},
4099                pass,
4100                expectedResult);
4101    }
4102
4103    boolean expectAux(String tag, String[] results, boolean pass,
4104            String expectedResult) {
4105        msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true);
4106
4107        for (int i = 0; i < results.length; ++i) {
4108            String label;
4109            if (i == 0) {
4110                label = "source:   ";
4111            } else if (i == results.length - 1) {
4112                label = "result:   ";
4113            } else {
4114                if (!isVerbose() && pass) continue;
4115                label = "interm" + i + ":  ";
4116            }
4117            msg("    " + label + results[i], pass ? LOG : ERR, false, true);
4118        }
4119
4120        if (!pass) {
4121            msg(  "    expected: " + expectedResult, ERR, false, true);
4122        }
4123
4124        return pass;
4125    }
4126
4127    private void assertTransform(String message, String expected, StringTransform t, String source) {
4128        assertEquals(message + " " + source, expected, t.transform(source));
4129    }
4130
4131
4132    private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) {
4133        assertEquals(message + " " +source, expected, t.transform(source));
4134        assertEquals(message + " " +source2, expected, t.transform(source2));
4135        assertEquals(message + " " + expected, source, back.transform(expected));
4136    }
4137
4138    /*
4139     * Tests the method public Enumeration<String> getAvailableTargets(String source)
4140     */
4141    public void TestGetAvailableTargets() {
4142        try {
4143            // Tests when if (targets == null) is true
4144            Transliterator.getAvailableTargets("");
4145        } catch (Exception e) {
4146            errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception.");
4147        }
4148    }
4149
4150    /*
4151     * Tests the method public Enumeration<String> getAvailableVariants(String source, String target)
4152     */
4153    public void TestGetAvailableVariants() {
4154        try {
4155            // Tests when if (targets == null) is true
4156            Transliterator.getAvailableVariants("", "");
4157        } catch (Exception e) {
4158            errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception.");
4159        }
4160    }
4161
4162    /*
4163     * Tests the mehtod String nextLine() in RuleBody
4164     */
4165    public void TestNextLine() {
4166        // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true
4167        try{
4168            Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD);
4169        } catch(Exception e){
4170            errln("TransliteratorParser.nextLine() was not suppose to return an " +
4171            "exception for a rule of '\\'");
4172        }
4173    }
4174}
4175