1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (C) 1999-2016, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*   Date        Name        Description
9*   11/10/99    aliu        Creation.
10**********************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_TRANSLITERATION
16
17#include "transtst.h"
18#include "unicode/locid.h"
19#include "unicode/dtfmtsym.h"
20#include "unicode/normlzr.h"
21#include "unicode/translit.h"
22#include "unicode/uchar.h"
23#include "unicode/unifilt.h"
24#include "unicode/uniset.h"
25#include "unicode/ustring.h"
26#include "unicode/usetiter.h"
27#include "unicode/uscript.h"
28#include "unicode/utf16.h"
29#include "cpdtrans.h"
30#include "nultrans.h"
31#include "rbt.h"
32#include "rbt_pars.h"
33#include "anytrans.h"
34#include "esctrn.h"
35#include "name2uni.h"
36#include "nortrans.h"
37#include "remtrans.h"
38#include "titletrn.h"
39#include "tolowtrn.h"
40#include "toupptrn.h"
41#include "unesctrn.h"
42#include "uni2name.h"
43#include "cstring.h"
44#include "cmemory.h"
45#include <stdio.h>
46
47/***********************************************************************
48
49                     HOW TO USE THIS TEST FILE
50                               -or-
51                  How I developed on two platforms
52                without losing (too much of) my mind
53
54
551. Add new tests by copying/pasting/changing existing tests.  On Java,
56   any public void method named Test...() taking no parameters becomes
57   a test.  On C++, you need to modify the header and add a line to
58   the runIndexedTest() dispatch method.
59
602. Make liberal use of the expect() method; it is your friend.
61
623. The tests in this file exactly match those in a sister file on the
63   other side.  The two files are:
64
65   icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
66   icu4c:  source/test/intltest/transtst.cpp
67
68                  ==> THIS IS THE IMPORTANT PART <==
69
70   When you add a test in this file, add it in TransliteratorTest.java
71   too.  Give it the same name and put it in the same relative place.
72   This makes maintenance a lot simpler for any poor soul who ends up
73   trying to synchronize the tests between icu4j and icu4c.
74
754. If you MUST enter a test that is NOT paralleled in the sister file,
76   then add it in the special non-mirrored section.  These are
77   labeled
78
79     "icu4j ONLY"
80
81   or
82
83     "icu4c ONLY"
84
85   Make sure you document the reason the test is here and not there.
86
87
88Thank you.
89The Management
90***********************************************************************/
91
92// Define character constants thusly to be EBCDIC-friendly
93enum {
94    LEFT_BRACE=((UChar)0x007B), /*{*/
95    PIPE      =((UChar)0x007C), /*|*/
96    ZERO      =((UChar)0x0030), /*0*/
97    UPPER_A   =((UChar)0x0041)  /*A*/
98};
99
100TransliteratorTest::TransliteratorTest()
101:   DESERET_DEE((UChar32)0x10414),
102    DESERET_dee((UChar32)0x1043C)
103{
104}
105
106TransliteratorTest::~TransliteratorTest() {}
107
108void
109TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
110                                   const char* &name, char* /*par*/) {
111    switch (index) {
112        TESTCASE(0,TestInstantiation);
113        TESTCASE(1,TestSimpleRules);
114        TESTCASE(2,TestRuleBasedInverse);
115        TESTCASE(3,TestKeyboard);
116        TESTCASE(4,TestKeyboard2);
117        TESTCASE(5,TestKeyboard3);
118        TESTCASE(6,TestArabic);
119        TESTCASE(7,TestCompoundKana);
120        TESTCASE(8,TestCompoundHex);
121        TESTCASE(9,TestFiltering);
122        TESTCASE(10,TestInlineSet);
123        TESTCASE(11,TestPatternQuoting);
124        TESTCASE(12,TestJ277);
125        TESTCASE(13,TestJ243);
126        TESTCASE(14,TestJ329);
127        TESTCASE(15,TestSegments);
128        TESTCASE(16,TestCursorOffset);
129        TESTCASE(17,TestArbitraryVariableValues);
130        TESTCASE(18,TestPositionHandling);
131        TESTCASE(19,TestHiraganaKatakana);
132        TESTCASE(20,TestCopyJ476);
133        TESTCASE(21,TestAnchors);
134        TESTCASE(22,TestInterIndic);
135        TESTCASE(23,TestFilterIDs);
136        TESTCASE(24,TestCaseMap);
137        TESTCASE(25,TestNameMap);
138        TESTCASE(26,TestLiberalizedID);
139        TESTCASE(27,TestCreateInstance);
140        TESTCASE(28,TestNormalizationTransliterator);
141        TESTCASE(29,TestCompoundRBT);
142        TESTCASE(30,TestCompoundFilter);
143        TESTCASE(31,TestRemove);
144        TESTCASE(32,TestToRules);
145        TESTCASE(33,TestContext);
146        TESTCASE(34,TestSupplemental);
147        TESTCASE(35,TestQuantifier);
148        TESTCASE(36,TestSTV);
149        TESTCASE(37,TestCompoundInverse);
150        TESTCASE(38,TestNFDChainRBT);
151        TESTCASE(39,TestNullInverse);
152        TESTCASE(40,TestAliasInverseID);
153        TESTCASE(41,TestCompoundInverseID);
154        TESTCASE(42,TestUndefinedVariable);
155        TESTCASE(43,TestEmptyContext);
156        TESTCASE(44,TestCompoundFilterID);
157        TESTCASE(45,TestPropertySet);
158        TESTCASE(46,TestNewEngine);
159        TESTCASE(47,TestQuantifiedSegment);
160        TESTCASE(48,TestDevanagariLatinRT);
161        TESTCASE(49,TestTeluguLatinRT);
162        TESTCASE(50,TestCompoundLatinRT);
163        TESTCASE(51,TestSanskritLatinRT);
164        TESTCASE(52,TestLocaleInstantiation);
165        TESTCASE(53,TestTitleAccents);
166        TESTCASE(54,TestLocaleResource);
167        TESTCASE(55,TestParseError);
168        TESTCASE(56,TestOutputSet);
169        TESTCASE(57,TestVariableRange);
170        TESTCASE(58,TestInvalidPostContext);
171        TESTCASE(59,TestIDForms);
172        TESTCASE(60,TestToRulesMark);
173        TESTCASE(61,TestEscape);
174        TESTCASE(62,TestAnchorMasking);
175        TESTCASE(63,TestDisplayName);
176        TESTCASE(64,TestSpecialCases);
177#if !UCONFIG_NO_FILE_IO
178        TESTCASE(65,TestIncrementalProgress);
179#endif
180        TESTCASE(66,TestSurrogateCasing);
181        TESTCASE(67,TestFunction);
182        TESTCASE(68,TestInvalidBackRef);
183        TESTCASE(69,TestMulticharStringSet);
184        TESTCASE(70,TestUserFunction);
185        TESTCASE(71,TestAnyX);
186        TESTCASE(72,TestSourceTargetSet);
187        TESTCASE(73,TestGurmukhiDevanagari);
188        TESTCASE(74,TestPatternWhiteSpace);
189        TESTCASE(75,TestAllCodepoints);
190        TESTCASE(76,TestBoilerplate);
191        TESTCASE(77,TestAlternateSyntax);
192        TESTCASE(78,TestBeginEnd);
193        TESTCASE(79,TestBeginEndToRules);
194        TESTCASE(80,TestRegisterAlias);
195        TESTCASE(81,TestRuleStripping);
196        TESTCASE(82,TestHalfwidthFullwidth);
197        TESTCASE(83,TestThai);
198        TESTCASE(84,TestAny);
199        default: name = ""; break;
200    }
201}
202
203/**
204 * Make sure every system transliterator can be instantiated.
205 *
206 * ALSO test that the result of toRules() for each rule is a valid
207 * rule.  Do this here so we don't have to have another test that
208 * instantiates everything as well.
209 */
210void TransliteratorTest::TestInstantiation() {
211    UErrorCode ec = U_ZERO_ERROR;
212    StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
213    assertSuccess("getAvailableIDs()", ec);
214    assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
215    int32_t n = Transliterator::countAvailableIDs();
216    assertTrue("getAvailableIDs().count()==countAvailableIDs()",
217               avail->count(ec) == n);
218    assertSuccess("count()", ec);
219    UnicodeString name;
220    for (int32_t i=0; i<n; ++i) {
221        const UnicodeString& id = *avail->snext(ec);
222        if (!assertSuccess("snext()", ec) ||
223            !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
224            break;
225        }
226        UnicodeString id2 = Transliterator::getAvailableID(i);
227        if (id.length() < 1) {
228            errln(UnicodeString("FAIL: getAvailableID(") +
229                  i + ") returned empty string");
230            continue;
231        }
232        if (id != id2) {
233            errln(UnicodeString("FAIL: getAvailableID(") +
234                  i + ") != getAvailableIDs().snext()");
235            continue;
236        }
237        UParseError parseError;
238        UErrorCode status = U_ZERO_ERROR;
239        Transliterator* t = Transliterator::createInstance(id,
240                              UTRANS_FORWARD, parseError,status);
241        name.truncate(0);
242        Transliterator::getDisplayName(id, name);
243        if (t == 0) {
244#if UCONFIG_NO_BREAK_ITERATION
245            // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
246            if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
247                id.compare((UnicodeString)"Thai-Latin") != 0)
248#endif
249                dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
250                      /*", parse error " + parseError.code +*/
251                      ", line " + parseError.line +
252                      ", offset " + parseError.offset +
253                      ", pre-context " + prettify(parseError.preContext, TRUE) +
254                      ", post-context " +prettify(parseError.postContext,TRUE) +
255                      ", Error: " + u_errorName(status));
256                // When createInstance fails, it deletes the failing
257                // entry from the available ID list.  We detect this
258                // here by looking for a change in countAvailableIDs.
259            int32_t nn = Transliterator::countAvailableIDs();
260            if (nn == (n - 1)) {
261                n = nn;
262                --i; // Compensate for deleted entry
263            }
264        } else {
265            logln(UnicodeString("OK: ") + name + " (" + id + ")");
266
267            // Now test toRules
268            UnicodeString rules;
269            t->toRules(rules, TRUE);
270            Transliterator *u = Transliterator::createFromRules("x",
271                                    rules, UTRANS_FORWARD, parseError,status);
272            if (u == 0) {
273                errln(UnicodeString("FAIL: ") + id +
274                      ".createFromRules() => bad rules" +
275                      /*", parse error " + parseError.code +*/
276                      ", line " + parseError.line +
277                      ", offset " + parseError.offset +
278                      ", context " + prettify(parseError.preContext, TRUE) +
279                      ", rules: " + prettify(rules, TRUE));
280            } else {
281                delete u;
282            }
283            delete t;
284        }
285    }
286    assertTrue("snext()==NULL", avail->snext(ec)==NULL);
287    assertSuccess("snext()", ec);
288    delete avail;
289
290    // Now test the failure path
291    UParseError parseError;
292    UErrorCode status = U_ZERO_ERROR;
293    UnicodeString id("<Not a valid Transliterator ID>");
294    Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
295    if (t != 0) {
296        errln("FAIL: " + id + " returned a transliterator");
297        delete t;
298    } else {
299        logln("OK: Bogus ID handled properly");
300    }
301}
302
303void TransliteratorTest::TestSimpleRules(void) {
304    /* Example: rules 1. ab>x|y
305     *                2. yc>z
306     *
307     * []|eabcd  start - no match, copy e to tranlated buffer
308     * [e]|abcd  match rule 1 - copy output & adjust cursor
309     * [ex|y]cd  match rule 2 - copy output & adjust cursor
310     * [exz]|d   no match, copy d to transliterated buffer
311     * [exzd]|   done
312     */
313    expect(UnicodeString("ab>x|y;", "") +
314           "yc>z",
315           "eabcd", "exzd");
316
317    /* Another set of rules:
318     *    1. ab>x|yzacw
319     *    2. za>q
320     *    3. qc>r
321     *    4. cw>n
322     *
323     * []|ab       Rule 1
324     * [x|yzacw]   No match
325     * [xy|zacw]   Rule 2
326     * [xyq|cw]    Rule 4
327     * [xyqn]|     Done
328     */
329    expect(UnicodeString("ab>x|yzacw;") +
330           "za>q;" +
331           "qc>r;" +
332           "cw>n",
333           "ab", "xyqn");
334
335    /* Test categories
336     */
337    UErrorCode status = U_ZERO_ERROR;
338    UParseError parseError;
339    Transliterator *t = Transliterator::createFromRules(
340        "<ID>",
341        UnicodeString("$dummy=").append((UChar)0xE100) +
342        UnicodeString(";"
343                      "$vowel=[aeiouAEIOU];"
344                      "$lu=[:Lu:];"
345                      "$vowel } $lu > '!';"
346                      "$vowel > '&';"
347                      "'!' { $lu > '^';"
348                      "$lu > '*';"
349                      "a > ERROR", ""),
350        UTRANS_FORWARD, parseError,
351        status);
352    if (U_FAILURE(status)) {
353        dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
354        return;
355    }
356    expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
357    delete t;
358}
359
360/**
361 * Test inline set syntax and set variable syntax.
362 */
363void TransliteratorTest::TestInlineSet(void) {
364    expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
365    expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
366
367    expect(UnicodeString(
368           "$digit = [0-9];"
369           "$alpha = [a-zA-Z];"
370           "$alphanumeric = [$digit $alpha];" // ***
371           "$special = [^$alphanumeric];"     // ***
372           "$alphanumeric > '-';"
373           "$special > '*';", ""),
374
375           "thx-1138", "---*----");
376}
377
378/**
379 * Create some inverses and confirm that they work.  We have to be
380 * careful how we do this, since the inverses will not be true
381 * inverses -- we can't throw any random string at the composition
382 * of the transliterators and expect the identity function.  F x
383 * F' != I.  However, if we are careful about the input, we will
384 * get the expected results.
385 */
386void TransliteratorTest::TestRuleBasedInverse(void) {
387    UnicodeString RULES =
388        UnicodeString("abc>zyx;") +
389        "ab>yz;" +
390        "bc>zx;" +
391        "ca>xy;" +
392        "a>x;" +
393        "b>y;" +
394        "c>z;" +
395
396        "abc<zyx;" +
397        "ab<yz;" +
398        "bc<zx;" +
399        "ca<xy;" +
400        "a<x;" +
401        "b<y;" +
402        "c<z;" +
403
404        "";
405
406    const char* DATA[] = {
407        // Careful here -- random strings will not work.  If we keep
408        // the left side to the domain and the right side to the range
409        // we will be okay though (left, abc; right xyz).
410        "a", "x",
411        "abcacab", "zyxxxyy",
412        "caccb", "xyzzy",
413    };
414
415    int32_t DATA_length = UPRV_LENGTHOF(DATA);
416
417    UErrorCode status = U_ZERO_ERROR;
418    UParseError parseError;
419    Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
420                                UTRANS_FORWARD, parseError, status);
421    Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
422                                UTRANS_REVERSE, parseError, status);
423    if (U_FAILURE(status)) {
424        errln("FAIL: RBT constructor failed");
425        return;
426    }
427    for (int32_t i=0; i<DATA_length; i+=2) {
428        expect(*fwd, DATA[i], DATA[i+1]);
429        expect(*rev, DATA[i+1], DATA[i]);
430    }
431    delete fwd;
432    delete rev;
433}
434
435/**
436 * Basic test of keyboard.
437 */
438void TransliteratorTest::TestKeyboard(void) {
439    UParseError parseError;
440    UErrorCode status = U_ZERO_ERROR;
441    Transliterator *t = Transliterator::createFromRules("<ID>",
442                              UnicodeString("psch>Y;")
443                              +"ps>y;"
444                              +"ch>x;"
445                              +"a>A;",
446                              UTRANS_FORWARD, parseError,
447                              status);
448    if (U_FAILURE(status)) {
449        errln("FAIL: RBT constructor failed");
450        return;
451    }
452    const char* DATA[] = {
453        // insertion, buffer
454        "a", "A",
455        "p", "Ap",
456        "s", "Aps",
457        "c", "Apsc",
458        "a", "AycA",
459        "psch", "AycAY",
460        0, "AycAY", // null means finishKeyboardTransliteration
461    };
462
463    keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
464    delete t;
465}
466
467/**
468 * Basic test of keyboard with cursor.
469 */
470void TransliteratorTest::TestKeyboard2(void) {
471    UParseError parseError;
472    UErrorCode status = U_ZERO_ERROR;
473    Transliterator *t = Transliterator::createFromRules("<ID>",
474                              UnicodeString("ych>Y;")
475                              +"ps>|y;"
476                              +"ch>x;"
477                              +"a>A;",
478                              UTRANS_FORWARD, parseError,
479                              status);
480    if (U_FAILURE(status)) {
481        errln("FAIL: RBT constructor failed");
482        return;
483    }
484    const char* DATA[] = {
485        // insertion, buffer
486        "a", "A",
487        "p", "Ap",
488        "s", "Aps", // modified for rollback - "Ay",
489        "c", "Apsc", // modified for rollback - "Ayc",
490        "a", "AycA",
491        "p", "AycAp",
492        "s", "AycAps", // modified for rollback - "AycAy",
493        "c", "AycApsc", // modified for rollback - "AycAyc",
494        "h", "AycAY",
495        0, "AycAY", // null means finishKeyboardTransliteration
496    };
497
498    keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
499    delete t;
500}
501
502/**
503 * Test keyboard transliteration with back-replacement.
504 */
505void TransliteratorTest::TestKeyboard3(void) {
506    // We want th>z but t>y.  Furthermore, during keyboard
507    // transliteration we want t>y then yh>z if t, then h are
508    // typed.
509    UnicodeString RULES("t>|y;"
510                        "yh>z;");
511
512    const char* DATA[] = {
513        // Column 1: characters to add to buffer (as if typed)
514        // Column 2: expected appearance of buffer after
515        //           keyboard xliteration.
516        "a", "a",
517        "b", "ab",
518        "t", "abt", // modified for rollback - "aby",
519        "c", "abyc",
520        "t", "abyct", // modified for rollback - "abycy",
521        "h", "abycz",
522        0, "abycz", // null means finishKeyboardTransliteration
523    };
524
525    UParseError parseError;
526    UErrorCode status = U_ZERO_ERROR;
527    Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
528    if (U_FAILURE(status)) {
529        errln("FAIL: RBT constructor failed");
530        return;
531    }
532    keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
533    delete t;
534}
535
536void TransliteratorTest::keyboardAux(const Transliterator& t,
537                                     const char* DATA[], int32_t DATA_length) {
538    UErrorCode status = U_ZERO_ERROR;
539    UTransPosition index={0, 0, 0, 0};
540    UnicodeString s;
541    for (int32_t i=0; i<DATA_length; i+=2) {
542        UnicodeString log;
543        if (DATA[i] != 0) {
544            log = s + " + "
545                + DATA[i]
546                + " -> ";
547            t.transliterate(s, index, DATA[i], status);
548        } else {
549            log = s + " => ";
550            t.finishTransliteration(s, index);
551        }
552        // Show the start index '{' and the cursor '|'
553        UnicodeString a, b, c;
554        s.extractBetween(0, index.contextStart, a);
555        s.extractBetween(index.contextStart, index.start, b);
556        s.extractBetween(index.start, s.length(), c);
557        log.append(a).
558            append((UChar)LEFT_BRACE).
559            append(b).
560            append((UChar)PIPE).
561            append(c);
562        if (s == DATA[i+1] && U_SUCCESS(status)) {
563            logln(log);
564        } else {
565            errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
566        }
567    }
568}
569
570void TransliteratorTest::TestArabic(void) {
571// Test disabled for 2.0 until new Arabic transliterator can be written.
572//    /*
573//    const char* DATA[] = {
574//        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
575//                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
576//                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
577//                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
578//                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
579//                  "\u062c\u0645\u064a\u0644\u0629",
580//    };
581//    */
582//
583//    UChar ar_raw[] = {
584//        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
585//        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
586//        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587//        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
588//        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
589//        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
590//    };
591//    UnicodeString ar(ar_raw);
592//    UErrorCode status=U_ZERO_ERROR;
593//    UParseError parseError;
594//    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
595//    if (t == 0) {
596//        errln("FAIL: createInstance failed");
597//        return;
598//    }
599//    expect(*t, "Arabic", ar);
600//    delete t;
601}
602
603/**
604 * Compose the Kana transliterator forward and reverse and try
605 * some strings that should come out unchanged.
606 */
607void TransliteratorTest::TestCompoundKana(void) {
608    UParseError parseError;
609    UErrorCode status = U_ZERO_ERROR;
610    Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
611    if (t == 0) {
612        dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
613    } else {
614        expect(*t, "aaaaa", "aaaaa");
615        delete t;
616    }
617}
618
619/**
620 * Compose the hex transliterators forward and reverse.
621 */
622void TransliteratorTest::TestCompoundHex(void) {
623    UParseError parseError;
624    UErrorCode status = U_ZERO_ERROR;
625    Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
626    Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
627    Transliterator* transab[] = { a, b };
628    Transliterator* transba[] = { b, a };
629    if (a == 0 || b == 0) {
630        errln("FAIL: construction failed");
631        delete a;
632        delete b;
633        return;
634    }
635    // Do some basic tests of a
636    expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
637    // Do some basic tests of b
638    expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
639
640    Transliterator* ab = new CompoundTransliterator(transab, 2);
641    UnicodeString s("abcde", "");
642    expect(*ab, s, s);
643
644    UnicodeString str(s);
645    a->transliterate(str);
646    Transliterator* ba = new CompoundTransliterator(transba, 2);
647    expect(*ba, str, str);
648
649    delete ab;
650    delete ba;
651    delete a;
652    delete b;
653}
654
655int gTestFilterClassID = 0;
656/**
657 * Used by TestFiltering().
658 */
659class TestFilter : public UnicodeFilter {
660    virtual UnicodeFunctor* clone() const {
661        return new TestFilter(*this);
662    }
663    virtual UBool contains(UChar32 c) const {
664        return c != (UChar)0x0063 /*c*/;
665    }
666    // Stubs
667    virtual UnicodeString& toPattern(UnicodeString& result,
668                                     UBool /*escapeUnprintable*/) const {
669        return result;
670    }
671    virtual UBool matchesIndexValue(uint8_t /*v*/) const {
672        return FALSE;
673    }
674    virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
675public:
676    UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
677};
678
679/**
680 * Do some basic tests of filtering.
681 */
682void TransliteratorTest::TestFiltering(void) {
683    UParseError parseError;
684    UErrorCode status = U_ZERO_ERROR;
685    Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
686    if (hex == 0) {
687        errln("FAIL: createInstance(Any-Hex) failed");
688        return;
689    }
690    hex->adoptFilter(new TestFilter());
691    UnicodeString s("abcde");
692    hex->transliterate(s);
693    UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
694    if (s == exp) {
695        logln(UnicodeString("Ok:   \"") + exp + "\"");
696    } else {
697        logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
698    }
699
700    // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
701    UnicodeFilter *f = hex->orphanFilter();
702    if (f == NULL){
703        errln("FAIL: orphanFilter() should get a UnicodeFilter");
704    } else {
705        delete f;
706    }
707    delete hex;
708}
709
710/**
711 * Test anchors
712 */
713void TransliteratorTest::TestAnchors(void) {
714    expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
715           "aaa",
716           "012");
717    expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
718           "aaa",
719           "012");
720    expect(UnicodeString("^ab  > 01 ;"
721           " ab  > |8 ;"
722           "  b  > k ;"
723           " 8x$ > 45 ;"
724           " 8x  > 77 ;", ""),
725
726           "ababbabxabx",
727           "018k7745");
728    expect(UnicodeString("$s = [z$] ;"
729           "$s{ab    > 01 ;"
730           "   ab    > |8 ;"
731           "    b    > k ;"
732           "   8x}$s > 45 ;"
733           "   8x    > 77 ;", ""),
734
735           "abzababbabxzabxabx",
736           "01z018k45z01x45");
737}
738
739/**
740 * Test pattern quoting and escape mechanisms.
741 */
742void TransliteratorTest::TestPatternQuoting(void) {
743    // Array of 3n items
744    // Each item is <rules>, <input>, <expected output>
745    const UnicodeString DATA[] = {
746        UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
747        UnicodeString(UChar(0x4E01)),
748        "[male adult]"
749    };
750
751    for (int32_t i=0; i<3; i+=3) {
752        logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
753        UParseError parseError;
754        UErrorCode status = U_ZERO_ERROR;
755        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
756        if (U_FAILURE(status)) {
757            errln("RBT constructor failed");
758        } else {
759            expect(*t, DATA[i+1], DATA[i+2]);
760        }
761        delete t;
762    }
763}
764
765/**
766 * Regression test for bugs found in Greek transliteration.
767 */
768void TransliteratorTest::TestJ277(void) {
769    UErrorCode status = U_ZERO_ERROR;
770    UParseError parseError;
771    Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
772    if (gl == NULL) {
773        dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
774        return;
775    }
776
777    UChar sigma = 0x3C3;
778    UChar upsilon = 0x3C5;
779    UChar nu = 0x3BD;
780//    UChar PHI = 0x3A6;
781    UChar alpha = 0x3B1;
782//    UChar omega = 0x3C9;
783//    UChar omicron = 0x3BF;
784//    UChar epsilon = 0x3B5;
785
786    // sigma upsilon nu -> syn
787    UnicodeString syn;
788    syn.append(sigma).append(upsilon).append(nu);
789    expect(*gl, syn, "syn");
790
791    // sigma alpha upsilon nu -> saun
792    UnicodeString sayn;
793    sayn.append(sigma).append(alpha).append(upsilon).append(nu);
794    expect(*gl, sayn, "saun");
795
796    // Again, using a smaller rule set
797    UnicodeString rules(
798                "$alpha   = \\u03B1;"
799                "$nu      = \\u03BD;"
800                "$sigma   = \\u03C3;"
801                "$ypsilon = \\u03C5;"
802                "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
803                "s <>           $sigma;"
804                "a <>           $alpha;"
805                "u <>  $vowel { $ypsilon;"
806                "y <>           $ypsilon;"
807                "n <>           $nu;",
808                "");
809    Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
810    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
811    expect(*mini, syn, "syn");
812    expect(*mini, sayn, "saun");
813    delete mini;
814    mini = NULL;
815
816#if !UCONFIG_NO_FORMATTING
817    // Transliterate the Greek locale data
818    Locale el("el");
819    DateFormatSymbols syms(el, status);
820    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
821    int32_t i, count;
822    const UnicodeString* data = syms.getMonths(count);
823    for (i=0; i<count; ++i) {
824        if (data[i].length() == 0) {
825            continue;
826        }
827        UnicodeString out(data[i]);
828        gl->transliterate(out);
829        UBool ok = TRUE;
830        if (data[i].length() >= 2 && out.length() >= 2 &&
831            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
832            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
833                ok = FALSE;
834            }
835        }
836        if (ok) {
837            logln(prettify(data[i] + " -> " + out));
838        } else {
839            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
840        }
841    }
842#endif
843
844    delete gl;
845}
846
847/**
848 * Prefix, suffix support in hex transliterators
849 */
850void TransliteratorTest::TestJ243(void) {
851    UErrorCode ec = U_ZERO_ERROR;
852
853    // Test default Hex-Any, which should handle
854    // \u, \U, u+, and U+
855    Transliterator *hex =
856        Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
857    if (assertSuccess("getInstance", ec)) {
858        expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
859    }
860    delete hex;
861
862//    // Try a custom Hex-Unicode
863//    // \uXXXX and &#xXXXX;
864//    ec = U_ZERO_ERROR;
865//    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
866//    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
867//           "abcd5fx012&#x00033;");
868//    // Try custom Any-Hex (default is tested elsewhere)
869//    ec = U_ZERO_ERROR;
870//    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
871//    expect(hex3, "012", "&#x30;&#x31;&#x32;");
872}
873
874/**
875 * Parsers need better syntax error messages.
876 */
877void TransliteratorTest::TestJ329(void) {
878
879    struct { UBool containsErrors; const char* rule; } DATA[] = {
880        { FALSE, "a > b; c > d" },
881        { TRUE,  "a > b; no operator; c > d" },
882    };
883    int32_t DATA_length = UPRV_LENGTHOF(DATA);
884
885    for (int32_t i=0; i<DATA_length; ++i) {
886        UErrorCode status = U_ZERO_ERROR;
887        UParseError parseError;
888        Transliterator *rbt = Transliterator::createFromRules("<ID>",
889                                    DATA[i].rule,
890                                    UTRANS_FORWARD,
891                                    parseError,
892                                    status);
893        UBool gotError = U_FAILURE(status);
894        UnicodeString desc(DATA[i].rule);
895        desc.append(gotError ? " -> error" : " -> no error");
896        if (gotError) {
897            desc = desc + ", ParseError code=" + u_errorName(status) +
898                " line=" + parseError.line +
899                " offset=" + parseError.offset +
900                " context=" + parseError.preContext;
901        }
902        if (gotError == DATA[i].containsErrors) {
903            logln(UnicodeString("Ok:   ") + desc);
904        } else {
905            errln(UnicodeString("FAIL: ") + desc);
906        }
907        delete rbt;
908    }
909}
910
911/**
912 * Test segments and segment references.
913 */
914void TransliteratorTest::TestSegments(void) {
915    // Array of 3n items
916    // Each item is <rules>, <input>, <expected output>
917    UnicodeString DATA[] = {
918        "([a-z]) '.' ([0-9]) > $2 '-' $1",
919        "abc.123.xyz.456",
920        "ab1-c23.xy4-z56",
921
922        // nested
923        "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
924        "a1 b2",
925        "a1.a.1 b2.b.2",
926    };
927    int32_t DATA_length = UPRV_LENGTHOF(DATA);
928
929    for (int32_t i=0; i<DATA_length; i+=3) {
930        logln("Pattern: " + prettify(DATA[i]));
931        UParseError parseError;
932        UErrorCode status = U_ZERO_ERROR;
933        Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
934        if (U_FAILURE(status)) {
935            errln("FAIL: RBT constructor");
936        } else {
937            expect(*t, DATA[i+1], DATA[i+2]);
938        }
939        delete t;
940    }
941}
942
943/**
944 * Test cursor positioning outside of the key
945 */
946void TransliteratorTest::TestCursorOffset(void) {
947    // Array of 3n items
948    // Each item is <rules>, <input>, <expected output>
949    UnicodeString DATA[] = {
950        "pre {alpha} post > | @ ALPHA ;"
951        "eALPHA > beta ;"
952        "pre {beta} post > BETA @@ | ;"
953        "post > xyz",
954
955        "prealphapost prebetapost",
956
957        "prbetaxyz preBETApost",
958    };
959    int32_t DATA_length = UPRV_LENGTHOF(DATA);
960
961    for (int32_t i=0; i<DATA_length; i+=3) {
962        logln("Pattern: " + prettify(DATA[i]));
963        UParseError parseError;
964        UErrorCode status = U_ZERO_ERROR;
965        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
966        if (U_FAILURE(status)) {
967            errln("FAIL: RBT constructor");
968        } else {
969            expect(*t, DATA[i+1], DATA[i+2]);
970        }
971        delete t;
972    }
973}
974
975/**
976 * Test zero length and > 1 char length variable values.  Test
977 * use of variable refs in UnicodeSets.
978 */
979void TransliteratorTest::TestArbitraryVariableValues(void) {
980    // Array of 3n items
981    // Each item is <rules>, <input>, <expected output>
982    UnicodeString DATA[] = {
983        "$abe = ab;"
984        "$pat = x[yY]z;"
985        "$ll  = 'a-z';"
986        "$llZ = [$ll];"
987        "$llY = [$ll$pat];"
988        "$emp = ;"
989
990        "$abe > ABE;"
991        "$pat > END;"
992        "$llZ > 1;"
993        "$llY > 2;"
994        "7$emp 8 > 9;"
995        "",
996
997        "ab xYzxyz stY78",
998        "ABE ENDEND 1129",
999    };
1000    int32_t DATA_length = UPRV_LENGTHOF(DATA);
1001
1002    for (int32_t i=0; i<DATA_length; i+=3) {
1003        logln("Pattern: " + prettify(DATA[i]));
1004        UParseError parseError;
1005        UErrorCode status = U_ZERO_ERROR;
1006        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1007        if (U_FAILURE(status)) {
1008            errln("FAIL: RBT constructor");
1009        } else {
1010            expect(*t, DATA[i+1], DATA[i+2]);
1011        }
1012        delete t;
1013    }
1014}
1015
1016/**
1017 * Confirm that the contextStart, contextLimit, start, and limit
1018 * behave correctly. J474.
1019 */
1020void TransliteratorTest::TestPositionHandling(void) {
1021    // Array of 3n items
1022    // Each item is <rules>, <input>, <expected output>
1023    const char* DATA[] = {
1024        "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1025        "xtat txtb", // pos 0,9,0,9
1026        "xTTaSS TTxUUb",
1027
1028        "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1029        "xtat txtb", // pos 2,9,3,8
1030        "xtaSS TTxUUb",
1031
1032        "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1033        "xtat txtb", // pos 3,8,3,8
1034        "xtaTT TTxTTb",
1035    };
1036
1037    // Array of 4n positions -- these go with the DATA array
1038    // They are: contextStart, contextLimit, start, limit
1039    int32_t POS[] = {
1040        0, 9, 0, 9,
1041        2, 9, 3, 8,
1042        3, 8, 3, 8,
1043    };
1044
1045    int32_t n = UPRV_LENGTHOF(DATA) / 3;
1046    for (int32_t i=0; i<n; i++) {
1047        UErrorCode status = U_ZERO_ERROR;
1048        UParseError parseError;
1049        Transliterator *t = Transliterator::createFromRules("<ID>",
1050                                DATA[3*i], UTRANS_FORWARD, parseError, status);
1051        if (U_FAILURE(status)) {
1052            delete t;
1053            errln("FAIL: RBT constructor");
1054            return;
1055        }
1056        UTransPosition pos;
1057        pos.contextStart= POS[4*i];
1058        pos.contextLimit = POS[4*i+1];
1059        pos.start = POS[4*i+2];
1060        pos.limit = POS[4*i+3];
1061        UnicodeString rsource(DATA[3*i+1]);
1062        t->transliterate(rsource, pos, status);
1063        if (U_FAILURE(status)) {
1064            delete t;
1065            errln("FAIL: transliterate");
1066            return;
1067        }
1068        t->finishTransliteration(rsource, pos);
1069        expectAux(DATA[3*i],
1070                  DATA[3*i+1],
1071                  rsource,
1072                  DATA[3*i+2]);
1073        delete t;
1074    }
1075}
1076
1077/**
1078 * Test the Hiragana-Katakana transliterator.
1079 */
1080void TransliteratorTest::TestHiraganaKatakana(void) {
1081    UParseError parseError;
1082    UErrorCode status = U_ZERO_ERROR;
1083    Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1084    Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1085    if (hk == 0 || kh == 0) {
1086        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1087        delete hk;
1088        delete kh;
1089        return;
1090    }
1091
1092    // Array of 3n items
1093    // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1094    const char* DATA[] = {
1095        "both",
1096        "\\u3042\\u3090\\u3099\\u3092\\u3050",
1097        "\\u30A2\\u30F8\\u30F2\\u30B0",
1098
1099        "kh",
1100        "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1101        "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1102    };
1103    int32_t DATA_length = UPRV_LENGTHOF(DATA);
1104
1105    for (int32_t i=0; i<DATA_length; i+=3) {
1106        UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1107        UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1108        switch (*DATA[i]) {
1109        case 0x68: //'h': // Hiragana-Katakana
1110            expect(*hk, h, k);
1111            break;
1112        case 0x6B: //'k': // Katakana-Hiragana
1113            expect(*kh, k, h);
1114            break;
1115        case 0x62: //'b': // both
1116            expect(*hk, h, k);
1117            expect(*kh, k, h);
1118            break;
1119        }
1120    }
1121    delete hk;
1122    delete kh;
1123}
1124
1125/**
1126 * Test cloning / copy constructor of RBT.
1127 */
1128void TransliteratorTest::TestCopyJ476(void) {
1129    // The real test here is what happens when the destructors are
1130    // called.  So we let one object get destructed, and check to
1131    // see that its copy still works.
1132    Transliterator *t2 = 0;
1133    {
1134        UParseError parseError;
1135        UErrorCode status = U_ZERO_ERROR;
1136        Transliterator *t1 = Transliterator::createFromRules("t1",
1137            "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1138        if (U_FAILURE(status)) {
1139            errln("FAIL: RBT constructor");
1140            return;
1141        }
1142        t2 = t1->clone(); // Call copy constructor under the covers.
1143        expect(*t1, "abcfoofoo", "ABcbar");
1144        delete t1;
1145    }
1146    expect(*t2, "abcfoofoo", "ABcbar");
1147    delete t2;
1148}
1149
1150/**
1151 * Test inter-Indic transliterators.  These are composed.
1152 * ICU4C Jitterbug 483.
1153 */
1154void TransliteratorTest::TestInterIndic(void) {
1155    UnicodeString ID("Devanagari-Gujarati", "");
1156    UErrorCode status = U_ZERO_ERROR;
1157    UParseError parseError;
1158    Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1159    if (dg == 0) {
1160        dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1161        return;
1162    }
1163    UnicodeString id = dg->getID();
1164    if (id != ID) {
1165        errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1166    }
1167    UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1168    UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1169    expect(*dg, dev, guj);
1170    delete dg;
1171}
1172
1173/**
1174 * Test filter syntax in IDs. (J918)
1175 */
1176void TransliteratorTest::TestFilterIDs(void) {
1177    // Array of 3n strings:
1178    // <id>, <inverse id>, <input>, <expected output>
1179    const char* DATA[] = {
1180        "[aeiou]Any-Hex", // ID
1181        "[aeiou]Hex-Any", // expected inverse ID
1182        "quizzical",      // src
1183        "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1184
1185        "[aeiou]Any-Hex;[^5]Hex-Any",
1186        "[^5]Any-Hex;[aeiou]Hex-Any",
1187        "quizzical",
1188        "q\\u0075izzical",
1189
1190        "[abc]Null",
1191        "[abc]Null",
1192        "xyz",
1193        "xyz",
1194    };
1195    enum { DATA_length = UPRV_LENGTHOF(DATA) };
1196
1197    for (int i=0; i<DATA_length; i+=4) {
1198        UnicodeString ID(DATA[i], "");
1199        UnicodeString uID(DATA[i+1], "");
1200        UnicodeString data2(DATA[i+2], "");
1201        UnicodeString data3(DATA[i+3], "");
1202        UParseError parseError;
1203        UErrorCode status = U_ZERO_ERROR;
1204        Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1205        if (t == 0) {
1206            errln("FAIL: createInstance(" + ID + ") returned NULL");
1207            return;
1208        }
1209        expect(*t, data2, data3);
1210
1211        // Check the ID
1212        if (ID != t->getID()) {
1213            errln("FAIL: createInstance(" + ID + ").getID() => " +
1214                  t->getID());
1215        }
1216
1217        // Check the inverse
1218        Transliterator *u = t->createInverse(status);
1219        if (u == 0) {
1220            errln("FAIL: " + ID + ".createInverse() returned NULL");
1221        } else if (u->getID() != uID) {
1222            errln("FAIL: " + ID + ".createInverse().getID() => " +
1223                  u->getID() + ", expected " + uID);
1224        }
1225
1226        delete t;
1227        delete u;
1228    }
1229}
1230
1231/**
1232 * Test the case mapping transliterators.
1233 */
1234void TransliteratorTest::TestCaseMap(void) {
1235    UParseError parseError;
1236    UErrorCode status = U_ZERO_ERROR;
1237    Transliterator* toUpper =
1238        Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239    Transliterator* toLower =
1240        Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241    Transliterator* toTitle =
1242        Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1243    if (toUpper==0 || toLower==0 || toTitle==0) {
1244        errln("FAIL: createInstance returned NULL");
1245        delete toUpper;
1246        delete toLower;
1247        delete toTitle;
1248        return;
1249    }
1250
1251    expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1252           "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1253    expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1254           "the quick brown foX jumped over the lazY dogs.");
1255    expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1256           "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1257
1258    delete toUpper;
1259    delete toLower;
1260    delete toTitle;
1261}
1262
1263/**
1264 * Test the name mapping transliterators.
1265 */
1266void TransliteratorTest::TestNameMap(void) {
1267    UParseError parseError;
1268    UErrorCode status = U_ZERO_ERROR;
1269    Transliterator* uni2name =
1270        Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1271    Transliterator* name2uni =
1272        Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1273    if (uni2name==0 || name2uni==0) {
1274        errln("FAIL: createInstance returned NULL");
1275        delete uni2name;
1276        delete name2uni;
1277        return;
1278    }
1279
1280    // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1281    expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1282           CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1283    expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1284           CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1285
1286    delete uni2name;
1287    delete name2uni;
1288
1289    // round trip
1290    Transliterator* t =
1291        Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1292    if (t==0) {
1293        errln("FAIL: createInstance returned NULL");
1294        delete t;
1295        return;
1296    }
1297
1298    // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1299    UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1300    expect(*t, s, s);
1301    delete t;
1302}
1303
1304/**
1305 * Test liberalized ID syntax.  1006c
1306 */
1307void TransliteratorTest::TestLiberalizedID(void) {
1308    // Some test cases have an expected getID() value of NULL.  This
1309    // means I have disabled the test case for now.  This stuff is
1310    // still under development, and I haven't decided whether to make
1311    // getID() return canonical case yet.  It will all get rewritten
1312    // with the move to Source-Target/Variant IDs anyway. [aliu]
1313    const char* DATA[] = {
1314        "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1315        "  Null  ", "Null", "whitespace",
1316        " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1317        "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1318    };
1319    const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1320    UParseError parseError;
1321    UErrorCode status= U_ZERO_ERROR;
1322    for (int32_t i=0; i<DATA_length; i+=3) {
1323        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1324        if (t == 0) {
1325            dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1326                  " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1327        } else {
1328            UnicodeString exp;
1329            if (DATA[i+1]) {
1330                exp = UnicodeString(DATA[i+1], "");
1331            }
1332            // Don't worry about getID() if the expected char*
1333            // is NULL -- see above.
1334            if (exp.length() == 0 || exp == t->getID()) {
1335                logln(UnicodeString("Ok: ") + DATA[i+2] +
1336                      " create ID \"" + DATA[i] + "\" => \"" +
1337                      exp + "\"");
1338            } else {
1339                errln(UnicodeString("FAIL: ") + DATA[i+2] +
1340                      " create ID \"" + DATA[i] + "\" => \"" +
1341                      t->getID() + "\", exp \"" + exp + "\"");
1342            }
1343            delete t;
1344        }
1345    }
1346}
1347
1348/* test for Jitterbug 912 */
1349void TransliteratorTest::TestCreateInstance(){
1350    const char* FORWARD = "F";
1351    const char* REVERSE = "R";
1352    const char* DATA[] = {
1353        // Column 1: id
1354        // Column 2: direction
1355        // Column 3: expected ID, or "" if expect failure
1356        "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1357
1358        // JB#2689: bad compound causes crash
1359        "InvalidSource-InvalidTarget", FORWARD, "",
1360        "InvalidSource-InvalidTarget", REVERSE, "",
1361        "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1362        "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1363        "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1364        "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1365
1366        NULL
1367    };
1368
1369    for (int32_t i=0; DATA[i]; i+=3) {
1370        UParseError err;
1371        UErrorCode ec = U_ZERO_ERROR;
1372        UnicodeString id(DATA[i]);
1373        UTransDirection dir = (DATA[i+1]==FORWARD)?
1374            UTRANS_FORWARD:UTRANS_REVERSE;
1375        UnicodeString expID(DATA[i+2]);
1376        Transliterator* t =
1377            Transliterator::createInstance(id,dir,err,ec);
1378        UnicodeString newID;
1379        if (t) {
1380            newID = t->getID();
1381        }
1382        UBool ok = (newID == expID);
1383        if (!t) {
1384            newID = u_errorName(ec);
1385        }
1386        if (ok) {
1387            logln((UnicodeString)"Ok: createInstance(" +
1388                  id + "," + DATA[i+1] + ") => " + newID);
1389        } else {
1390            dataerrln((UnicodeString)"FAIL: createInstance(" +
1391                  id + "," + DATA[i+1] + ") => " + newID +
1392                  ", expected " + expID);
1393        }
1394        delete t;
1395    }
1396}
1397
1398/**
1399 * Test the normalization transliterator.
1400 */
1401void TransliteratorTest::TestNormalizationTransliterator() {
1402    // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1403    // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1404    const char* CANON[] = {
1405        // Input               Decomposed            Composed
1406        "cat",                "cat",                "cat"               ,
1407        "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1408
1409        "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1410        "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1411
1412        "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1413        "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1414        "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1415
1416        "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1417        "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1418
1419        "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1420        "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1421        "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1422
1423        "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1424        "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1425
1426        "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1427        "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1428
1429        "Henry IV",           "Henry IV",           "Henry IV"          ,
1430        "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1431
1432        "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1433        "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1434        "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1435        "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1436        "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1437
1438        "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1439        0 // end
1440    };
1441
1442    const char* COMPAT[] = {
1443        // Input               Decomposed            Composed
1444        "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1445
1446        "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1447        "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1448
1449        "Henry IV",           "Henry IV",           "Henry IV"          ,
1450        "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1451
1452        "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1453        "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1454
1455        "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1456        0 // end
1457    };
1458
1459    int32_t i;
1460    UParseError parseError;
1461    UErrorCode status = U_ZERO_ERROR;
1462    Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1463    Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1464    if (!NFD || !NFC) {
1465        dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1466        delete NFD;
1467        delete NFC;
1468        return;
1469    }
1470    for (i=0; CANON[i]; i+=3) {
1471        UnicodeString in = CharsToUnicodeString(CANON[i]);
1472        UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1473        UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1474        expect(*NFD, in, expd);
1475        expect(*NFC, in, expc);
1476    }
1477    delete NFD;
1478    delete NFC;
1479
1480    Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1481    Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1482    if (!NFKD || !NFKC) {
1483        dataerrln("FAIL: createInstance failed");
1484        delete NFKD;
1485        delete NFKC;
1486        return;
1487    }
1488    for (i=0; COMPAT[i]; i+=3) {
1489        UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1490        UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1491        UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1492        expect(*NFKD, in, expkd);
1493        expect(*NFKC, in, expkc);
1494    }
1495    delete NFKD;
1496    delete NFKC;
1497
1498    UParseError pe;
1499    status = U_ZERO_ERROR;
1500    Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1501                                                       UTRANS_FORWARD,
1502                                                       pe, status);
1503    if (t == 0) {
1504        errln("FAIL: createInstance failed");
1505    }
1506    expect(*t, CharsToUnicodeString("\\u010dx"),
1507           CharsToUnicodeString("c\\u030C"));
1508    delete t;
1509}
1510
1511/**
1512 * Test compound RBT rules.
1513 */
1514void TransliteratorTest::TestCompoundRBT(void) {
1515    // Careful with spacing and ';' here:  Phrase this exactly
1516    // as toRules() is going to return it.  If toRules() changes
1517    // with regard to spacing or ';', then adjust this string.
1518    UnicodeString rule("::Hex-Any;\n"
1519                       "::Any-Lower;\n"
1520                       "a > '.A.';\n"
1521                       "b > '.B.';\n"
1522                       "::[^t]Any-Upper;", "");
1523    UParseError parseError;
1524    UErrorCode status = U_ZERO_ERROR;
1525    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1526    if (t == 0) {
1527        errln("FAIL: createFromRules failed");
1528        return;
1529    }
1530    expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1531           "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1532    UnicodeString r;
1533    t->toRules(r, TRUE);
1534    if (r == rule) {
1535        logln((UnicodeString)"OK: toRules() => " + r);
1536    } else {
1537        errln((UnicodeString)"FAIL: toRules() => " + r +
1538              ", expected " + rule);
1539    }
1540    delete t;
1541
1542    // Now test toRules
1543    t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1544    if (t == 0) {
1545        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1546        return;
1547    }
1548    UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1549    t->toRules(r, TRUE);
1550    if (r != exp) {
1551        errln((UnicodeString)"FAIL: toRules() => " + r +
1552              ", expected " + exp);
1553    } else {
1554        logln((UnicodeString)"OK: toRules() => " + r);
1555    }
1556    delete t;
1557
1558    // Round trip the result of toRules
1559    t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1560    if (t == 0) {
1561        errln("FAIL: createFromRules #2 failed");
1562        return;
1563    } else {
1564        logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1565    }
1566
1567    // Test toRules again
1568    t->toRules(r, TRUE);
1569    if (r != exp) {
1570        errln((UnicodeString)"FAIL: toRules() => " + r +
1571              ", expected " + exp);
1572    } else {
1573        logln((UnicodeString)"OK: toRules() => " + r);
1574    }
1575
1576    delete t;
1577
1578    // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1579    // to what the regenerated ID will look like.
1580    UnicodeString id("Upper(Lower);(NFKC)", "");
1581    t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1582    if (t == 0) {
1583        errln("FAIL: createInstance #2 failed");
1584        return;
1585    }
1586    if (t->getID() == id) {
1587        logln((UnicodeString)"OK: created " + id);
1588    } else {
1589        errln((UnicodeString)"FAIL: createInstance(" + id +
1590              ").getID() => " + t->getID());
1591    }
1592
1593    Transliterator *u = t->createInverse(status);
1594    if (u == 0) {
1595        errln("FAIL: createInverse failed");
1596        delete t;
1597        return;
1598    }
1599    exp = "NFKC();Lower(Upper)";
1600    if (u->getID() == exp) {
1601        logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1602              u->getID());
1603    } else {
1604        errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1605              u->getID());
1606    }
1607    delete t;
1608    delete u;
1609}
1610
1611/**
1612 * Compound filter semantics were orginially not implemented
1613 * correctly.  Originally, each component filter f(i) is replaced by
1614 * f'(i) = f(i) && g, where g is the filter for the compound
1615 * transliterator.
1616 *
1617 * From Mark:
1618 *
1619 * Suppose and I have a transliterator X. Internally X is
1620 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1621 *
1622 * The compound should convert all greek characters (through latin) to
1623 * cyrillic, then lowercase the result. The filter should say "don't
1624 * touch 'A' in the original". But because an intermediate result
1625 * happens to go through "A", the Greek Alpha gets hung up.
1626 */
1627void TransliteratorTest::TestCompoundFilter(void) {
1628    UParseError parseError;
1629    UErrorCode status = U_ZERO_ERROR;
1630    Transliterator *t = Transliterator::createInstance
1631        ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1632    if (t == 0) {
1633        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1634        return;
1635    }
1636    t->adoptFilter(new UnicodeSet("[^A]", status));
1637    if (U_FAILURE(status)) {
1638        errln("FAIL: UnicodeSet ct failed");
1639        delete t;
1640        return;
1641    }
1642
1643    // Only the 'A' at index 1 should remain unchanged
1644    expect(*t,
1645           CharsToUnicodeString("BA\\u039A\\u0391"),
1646           CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1647    delete t;
1648}
1649
1650void TransliteratorTest::TestRemove(void) {
1651    UParseError parseError;
1652    UErrorCode status = U_ZERO_ERROR;
1653    Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1654    if (t == 0) {
1655        errln("FAIL: createInstance failed");
1656        return;
1657    }
1658
1659    expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1660
1661    // extra test for RemoveTransliterator::clone(), which at one point wasn't
1662    // duplicating the filter
1663    Transliterator* t2 = t->clone();
1664    expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1665
1666    delete t;
1667    delete t2;
1668}
1669
1670void TransliteratorTest::TestToRules(void) {
1671    const char* RBT = "rbt";
1672    const char* SET = "set";
1673    static const char* DATA[] = {
1674        RBT,
1675        "$a=\\u4E61; [$a] > A;",
1676        "[\\u4E61] > A;",
1677
1678        RBT,
1679        "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1680        "[[:Zs:][:Zl:]]{a} > A;",
1681
1682        SET,
1683        "[[:Zs:][:Zl:]]",
1684        "[[:Zs:][:Zl:]]",
1685
1686        SET,
1687        "[:Ps:]",
1688        "[:Ps:]",
1689
1690        SET,
1691        "[:L:]",
1692        "[:L:]",
1693
1694        SET,
1695        "[[:L:]-[A]]",
1696        "[[:L:]-[A]]",
1697
1698        SET,
1699        "[~[:Lu:][:Ll:]]",
1700        "[~[:Lu:][:Ll:]]",
1701
1702        SET,
1703        "[~[a-z]]",
1704        "[~[a-z]]",
1705
1706        RBT,
1707        "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1708        "[^[:Zs:]]{a} > A;",
1709
1710        RBT,
1711        "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1712        "[[a-z]-[:Zs:]]{a} > A;",
1713
1714        RBT,
1715        "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1716        "[[:Zs:]&[a-z]]{a} > A;",
1717
1718        RBT,
1719        "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1720        "[x[:Zs:]]{a} > A;",
1721
1722        RBT,
1723        "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1724        "$macron = \\u0304 ;"
1725        "$evowel = [aeiouyAEIOUY] ;"
1726        "$iotasub = \\u0345 ;"
1727        "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1728        "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1729
1730        RBT,
1731        "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732        "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733    };
1734    static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1735
1736    for (int32_t d=0; d < DATA_length; d+=3) {
1737        if (DATA[d] == RBT) {
1738            // Transliterator test
1739            UParseError parseError;
1740            UErrorCode status = U_ZERO_ERROR;
1741            Transliterator *t = Transliterator::createFromRules("ID",
1742                                                                UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1743            if (t == 0) {
1744                dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1745                return;
1746            }
1747            UnicodeString rules, escapedRules;
1748            t->toRules(rules, FALSE);
1749            t->toRules(escapedRules, TRUE);
1750            UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1751            UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1752            if (rules == expRules) {
1753                logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754                      " => " + rules);
1755            } else {
1756                errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1757                      " => " + rules + ", exp " + expRules);
1758            }
1759            if (escapedRules == expEscapedRules) {
1760                logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761                      " => " + escapedRules);
1762            } else {
1763                errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1764                      " => " + escapedRules + ", exp " + expEscapedRules);
1765            }
1766            delete t;
1767
1768        } else {
1769            // UnicodeSet test
1770            UErrorCode status = U_ZERO_ERROR;
1771            UnicodeString pat(DATA[d+1], -1, US_INV);
1772            UnicodeString expToPat(DATA[d+2], -1, US_INV);
1773            UnicodeSet set(pat, status);
1774            if (U_FAILURE(status)) {
1775                errln("FAIL: UnicodeSet ct failed");
1776                return;
1777            }
1778            // Adjust spacing etc. as necessary.
1779            UnicodeString toPat;
1780            set.toPattern(toPat);
1781            if (expToPat == toPat) {
1782                logln((UnicodeString)"Ok: " + pat +
1783                      " => " + toPat);
1784            } else {
1785                errln((UnicodeString)"FAIL: " + pat +
1786                      " => " + prettify(toPat, TRUE) +
1787                      ", exp " + prettify(pat, TRUE));
1788            }
1789        }
1790    }
1791}
1792
1793void TransliteratorTest::TestContext() {
1794    UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1795    expect("de > x; {d}e > y;",
1796           "de",
1797           "ye",
1798           &pos);
1799
1800    expect("ab{c} > z;",
1801           "xadabdabcy",
1802           "xadabdabzy");
1803}
1804
1805void TransliteratorTest::TestSupplemental() {
1806
1807    expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1808                                "a > $a; $s > i;"),
1809           CharsToUnicodeString("ab\\U0001030Fx"),
1810           CharsToUnicodeString("\\U00010300bix"));
1811
1812    expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1813                                "$b=[A-Z\\U00010400-\\U0001044D];"
1814                                "($a)($b) > $2 $1;"),
1815           CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1816           CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1817
1818    // k|ax\\U00010300xm
1819
1820    // k|a\\U00010400\\U00010300xm
1821    // ky|\\U00010400\\U00010300xm
1822    // ky\\U00010400|\\U00010300xm
1823
1824    // ky\\U00010400|\\U00010300\\U00010400m
1825    // ky\\U00010400y|\\U00010400m
1826    expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1827                                "$a {x} > | @ \\U00010400;"
1828                                "{$a} [^\\u0000-\\uFFFF] > y;"),
1829           CharsToUnicodeString("kax\\U00010300xm"),
1830           CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1831
1832    expectT("Any-Name",
1833           CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1834           UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1835
1836    expectT("Any-Hex/Unicode",
1837           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838           UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1839
1840    expectT("Any-Hex/C",
1841           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842           UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1843
1844    expectT("Any-Hex/Perl",
1845           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846           UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1847
1848    expectT("Any-Hex/Java",
1849           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850           UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1851
1852    expectT("Any-Hex/XML",
1853           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854           "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1855
1856    expectT("Any-Hex/XML10",
1857           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1858           "&#66352;&#1113856;&#917601;&#160;");
1859
1860    expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1861           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1862           CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1863}
1864
1865void TransliteratorTest::TestQuantifier() {
1866
1867    // Make sure @ in a quantified anteContext works
1868    expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1869           "AAAAAb",
1870           "aaa(aac)");
1871
1872    // Make sure @ in a quantified postContext works
1873    expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1874           "baaaaa",
1875           "caa(aaa)");
1876
1877    // Make sure @ in a quantified postContext with seg ref works
1878    expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1879           "baaaaa",
1880           "baa(aaa)");
1881
1882    // Make sure @ past ante context doesn't enter ante context
1883    UTransPosition pos = {0, 5, 3, 5};
1884    expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1885           "xxxab",
1886           "xxx(ac)",
1887           &pos);
1888
1889    // Make sure @ past post context doesn't pass limit
1890    UTransPosition pos2 = {0, 4, 0, 2};
1891    expect("{b} a+ > c @@ |; x > y; a > A;",
1892           "baxx",
1893           "caxx",
1894           &pos2);
1895
1896    // Make sure @ past post context doesn't enter post context
1897    expect("{b} a+ > c @@ |; x > y; a > A;",
1898           "baxx",
1899           "cayy");
1900
1901    expect("(ab)? c > d;",
1902           "c abc ababc",
1903           "d d abd");
1904
1905    // NOTE: The (ab)+ when referenced just yields a single "ab",
1906    // not the full sequence of them.  This accords with perl behavior.
1907    expect("(ab)+ {x} > '(' $1 ')';",
1908           "x abx ababxy",
1909           "x ab(ab) abab(ab)y");
1910
1911    expect("b+ > x;",
1912           "ac abc abbc abbbc",
1913           "ac axc axc axc");
1914
1915    expect("[abc]+ > x;",
1916           "qac abrc abbcs abtbbc",
1917           "qx xrx xs xtx");
1918
1919    expect("q{(ab)+} > x;",
1920           "qa qab qaba qababc qaba",
1921           "qa qx qxa qxc qxa");
1922
1923    expect("q(ab)* > x;",
1924           "qa qab qaba qababc",
1925           "xa x xa xc");
1926
1927    // NOTE: The (ab)+ when referenced just yields a single "ab",
1928    // not the full sequence of them.  This accords with perl behavior.
1929    expect("q(ab)* > '(' $1 ')';",
1930           "qa qab qaba qababc",
1931           "()a (ab) (ab)a (ab)c");
1932
1933    // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1934    // quoted string
1935    expect("'ab'+ > x;",
1936           "bb ab ababb",
1937           "bb x xb");
1938
1939    // $foo+ and $foo* -- the quantifier should apply to the entire
1940    // variable reference
1941    expect("$var = ab; $var+ > x;",
1942           "bb ab ababb",
1943           "bb x xb");
1944}
1945
1946class TestTrans : public Transliterator {
1947public:
1948    TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1949    }
1950    virtual Transliterator* clone(void) const {
1951        return new TestTrans(getID());
1952    }
1953    virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1954        UBool /*isIncremental*/) const
1955    {
1956        offsets.start = offsets.limit;
1957    }
1958    virtual UClassID getDynamicClassID() const;
1959    static UClassID U_EXPORT2 getStaticClassID();
1960};
1961UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1962
1963/**
1964 * Test Source-Target/Variant.
1965 */
1966void TransliteratorTest::TestSTV(void) {
1967    int32_t ns = Transliterator::countAvailableSources();
1968    if (ns < 0 || ns > 255) {
1969        errln((UnicodeString)"FAIL: Bad source count: " + ns);
1970        return;
1971    }
1972    int32_t i, j;
1973    for (i=0; i<ns; ++i) {
1974        UnicodeString source;
1975        Transliterator::getAvailableSource(i, source);
1976        logln((UnicodeString)"" + i + ": " + source);
1977        if (source.length() == 0) {
1978            errln("FAIL: empty source");
1979            continue;
1980        }
1981        int32_t nt = Transliterator::countAvailableTargets(source);
1982        if (nt < 0 || nt > 255) {
1983            errln((UnicodeString)"FAIL: Bad target count: " + nt);
1984            continue;
1985        }
1986        for (int32_t j=0; j<nt; ++j) {
1987            UnicodeString target;
1988            Transliterator::getAvailableTarget(j, source, target);
1989            logln((UnicodeString)" " + j + ": " + target);
1990            if (target.length() == 0) {
1991                errln("FAIL: empty target");
1992                continue;
1993            }
1994            int32_t nv = Transliterator::countAvailableVariants(source, target);
1995            if (nv < 0 || nv > 255) {
1996                errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1997                continue;
1998            }
1999            for (int32_t k=0; k<nv; ++k) {
2000                UnicodeString variant;
2001                Transliterator::getAvailableVariant(k, source, target, variant);
2002                if (variant.length() == 0) {
2003                    logln((UnicodeString)"  " + k + ": <empty>");
2004                } else {
2005                    logln((UnicodeString)"  " + k + ": " + variant);
2006                }
2007            }
2008        }
2009    }
2010
2011    // Test registration
2012    const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013    const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014    const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2015    for (i=0; i<3; ++i) {
2016        Transliterator *t = new TestTrans(IDS[i]);
2017        if (t == 0) {
2018            errln("FAIL: out of memory");
2019            return;
2020        }
2021        if (t->getID() != IDS[i]) {
2022            errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2023            delete t;
2024            return;
2025        }
2026        Transliterator::registerInstance(t);
2027        UErrorCode status = U_ZERO_ERROR;
2028        t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2029        if (t == NULL) {
2030            errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2031                  IDS[i]);
2032        } else {
2033            logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2034                  IDS[i]);
2035            delete t;
2036        }
2037        Transliterator::unregister(IDS[i]);
2038        t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2039        if (t != NULL) {
2040            errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2041                  IDS[i]);
2042            delete t;
2043        }
2044    }
2045
2046    // Make sure getAvailable API reflects removal
2047    int32_t n = Transliterator::countAvailableIDs();
2048    for (i=0; i<n; ++i) {
2049        UnicodeString id = Transliterator::getAvailableID(i);
2050        for (j=0; j<3; ++j) {
2051            if (id.caseCompare(FULL_IDS[j],0)==0) {
2052                errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2053            }
2054        }
2055    }
2056    n = Transliterator::countAvailableTargets("Any");
2057    for (i=0; i<n; ++i) {
2058        UnicodeString t;
2059        Transliterator::getAvailableTarget(i, "Any", t);
2060        if (t.caseCompare(IDS[0],0)==0) {
2061            errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2062        }
2063    }
2064    n = Transliterator::countAvailableSources();
2065    for (i=0; i<n; ++i) {
2066        UnicodeString s;
2067        Transliterator::getAvailableSource(i, s);
2068        for (j=0; j<3; ++j) {
2069            if (SOURCES[j] == NULL) continue;
2070            if (s.caseCompare(SOURCES[j],0)==0) {
2071                errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2072            }
2073        }
2074    }
2075}
2076
2077/**
2078 * Test inverse of Greek-Latin; Title()
2079 */
2080void TransliteratorTest::TestCompoundInverse(void) {
2081    UParseError parseError;
2082    UErrorCode status = U_ZERO_ERROR;
2083    Transliterator *t = Transliterator::createInstance
2084        ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2085    if (t == 0) {
2086        dataerrln("FAIL: createInstance - %s", u_errorName(status));
2087        return;
2088    }
2089    UnicodeString exp("(Title);Latin-Greek");
2090    if (t->getID() == exp) {
2091        logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2092              t->getID());
2093    } else {
2094        errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2095              t->getID() + "\", expected \"" + exp + "\"");
2096    }
2097    delete t;
2098}
2099
2100/**
2101 * Test NFD chaining with RBT
2102 */
2103void TransliteratorTest::TestNFDChainRBT() {
2104    UParseError pe;
2105    UErrorCode ec = U_ZERO_ERROR;
2106    Transliterator* t = Transliterator::createFromRules(
2107                               "TEST", "::NFD; aa > Q; a > q;",
2108                               UTRANS_FORWARD, pe, ec);
2109    if (t == NULL || U_FAILURE(ec)) {
2110        dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2111        return;
2112    }
2113    expect(*t, "aa", "Q");
2114    delete t;
2115
2116    // TEMPORARY TESTS -- BEING DEBUGGED
2117//=-    UnicodeString s, s2;
2118//=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2119//=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120//=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2121//=-    expect(*t, s, s2);
2122//=-    delete t;
2123//=-
2124//=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2125//=-    expect(*t, s2, s);
2126//=-    delete t;
2127//=-
2128//=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2129//=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2130//=-    expect(*t, s, s);
2131//=-    delete t;
2132
2133//    const char* source[] = {
2134//        /*
2135//        "\\u015Br\\u012Bmad",
2136//        "bhagavadg\\u012Bt\\u0101",
2137//        "adhy\\u0101ya",
2138//        "arjuna",
2139//        "vi\\u1E63\\u0101da",
2140//        "y\\u014Dga",
2141//        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2142//        "uv\\u0101cr\\u0325",
2143//        */
2144//        "rmk\\u1E63\\u0113t",
2145//      //"dharmak\\u1E63\\u0113tr\\u0113",
2146//        /*
2147//        "kuruk\\u1E63\\u0113tr\\u0113",
2148//        "samav\\u0113t\\u0101",
2149//        "yuyutsava-\\u1E25",
2150//        "m\\u0101mak\\u0101-\\u1E25",
2151//     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2152//        "kimakurvata",
2153//        "san\\u0304java",
2154//        */
2155//
2156//        0
2157//    };
2158//    const char* expected[] = {
2159//        /*
2160//        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2161//        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2162//        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2163//        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2164//        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2165//        "\\u092f\\u094b\\u0917",
2166//        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2167//        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2168//        */
2169//        "\\u0927",
2170//        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171//        /*
2172//        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173//        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2174//        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2175//        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2176//    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2177//        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2178//        "\\u0938\\u0902\\u091c\\u0935",
2179//        */
2180//        0
2181//    };
2182//    UErrorCode status = U_ZERO_ERROR;
2183//    UParseError parseError;
2184//    UnicodeString message;
2185//    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2186//    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2187//    if(U_FAILURE(status)){
2188//        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2189//        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2190//        delete latinToDevToLatin;
2191//        delete devToLatinToDev;
2192//        return;
2193//    }
2194//    UnicodeString gotResult;
2195//    for(int i= 0; source[i] != 0; i++){
2196//        gotResult = source[i];
2197//        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2198//        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2199//    }
2200//    delete latinToDevToLatin;
2201//    delete devToLatinToDev;
2202}
2203
2204/**
2205 * Inverse of "Null" should be "Null". (J21)
2206 */
2207void TransliteratorTest::TestNullInverse() {
2208    UParseError pe;
2209    UErrorCode ec = U_ZERO_ERROR;
2210    Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2211    if (t == 0 || U_FAILURE(ec)) {
2212        errln("FAIL: createInstance");
2213        return;
2214    }
2215    Transliterator *u = t->createInverse(ec);
2216    if (u == 0 || U_FAILURE(ec)) {
2217        errln("FAIL: createInverse");
2218        delete t;
2219        return;
2220    }
2221    if (u->getID() != "Null") {
2222        errln("FAIL: Inverse of Null should be Null");
2223    }
2224    delete t;
2225    delete u;
2226}
2227
2228/**
2229 * Check ID of inverse of alias. (J22)
2230 */
2231void TransliteratorTest::TestAliasInverseID() {
2232    UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2233    UParseError pe;
2234    UErrorCode ec = U_ZERO_ERROR;
2235    Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2236    if (t == 0 || U_FAILURE(ec)) {
2237        dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2238        return;
2239    }
2240    Transliterator *u = t->createInverse(ec);
2241    if (u == 0 || U_FAILURE(ec)) {
2242        errln("FAIL: createInverse");
2243        delete t;
2244        return;
2245    }
2246    UnicodeString exp = "Hangul-Latin";
2247    UnicodeString got = u->getID();
2248    if (got != exp) {
2249        errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2250              ", expected " + exp);
2251    }
2252    delete t;
2253    delete u;
2254}
2255
2256/**
2257 * Test IDs of inverses of compound transliterators. (J20)
2258 */
2259void TransliteratorTest::TestCompoundInverseID() {
2260    UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2261    UParseError pe;
2262    UErrorCode ec = U_ZERO_ERROR;
2263    Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2264    if (t == 0 || U_FAILURE(ec)) {
2265        dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2266        return;
2267    }
2268    Transliterator *u = t->createInverse(ec);
2269    if (u == 0 || U_FAILURE(ec)) {
2270        errln("FAIL: createInverse");
2271        delete t;
2272        return;
2273    }
2274    UnicodeString exp = "NFD(NFC);Jamo-Latin";
2275    UnicodeString got = u->getID();
2276    if (got != exp) {
2277        errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2278              ", expected " + exp);
2279    }
2280    delete t;
2281    delete u;
2282}
2283
2284/**
2285 * Test undefined variable.
2286
2287 */
2288void TransliteratorTest::TestUndefinedVariable() {
2289    UnicodeString rule = "$initial } a <> \\u1161;";
2290    UParseError pe;
2291    UErrorCode ec = U_ZERO_ERROR;
2292    Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2293    delete t;
2294    if (U_FAILURE(ec)) {
2295        logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2296              u_errorName(ec));
2297        return;
2298    }
2299    errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2300          u_errorName(ec));
2301}
2302
2303/**
2304 * Test empty context.
2305 */
2306void TransliteratorTest::TestEmptyContext() {
2307    expect(" { a } > b;", "xay a ", "xby b ");
2308}
2309
2310/**
2311* Test compound filter ID syntax
2312*/
2313void TransliteratorTest::TestCompoundFilterID(void) {
2314    static const char* DATA[] = {
2315        // Col. 1 = ID or rule set (latter must start with #)
2316
2317        // = columns > 1 are null if expect col. 1 to be illegal =
2318
2319        // Col. 2 = direction, "F..." or "R..."
2320        // Col. 3 = source string
2321        // Col. 4 = exp result
2322
2323        "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2324        "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2325        "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2326        "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327        "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2328        "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2329        NULL,
2330    };
2331
2332    for (int32_t i=0; DATA[i]; i+=4) {
2333        UnicodeString id = CharsToUnicodeString(DATA[i]);
2334        UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2335            UTRANS_REVERSE : UTRANS_FORWARD;
2336        UnicodeString source;
2337        UnicodeString exp;
2338        if (DATA[i+2] != NULL) {
2339            source = CharsToUnicodeString(DATA[i+2]);
2340            exp = CharsToUnicodeString(DATA[i+3]);
2341        }
2342        UBool expOk = (DATA[i+1] != NULL);
2343        Transliterator* t = NULL;
2344        UParseError pe;
2345        UErrorCode ec = U_ZERO_ERROR;
2346        if (id.charAt(0) == 0x23/*#*/) {
2347            t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2348        } else {
2349            t = Transliterator::createInstance(id, direction, pe, ec);
2350        }
2351        UBool ok = (t != NULL && U_SUCCESS(ec));
2352        UnicodeString transID;
2353        if (t!=0) {
2354            transID = t->getID();
2355        }
2356        else {
2357            transID = UnicodeString("NULL", "");
2358        }
2359        if (ok == expOk) {
2360            logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2361                  u_errorName(ec));
2362            if (source.length() != 0) {
2363                expect(*t, source, exp);
2364            }
2365            delete t;
2366        } else {
2367            dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2368                  u_errorName(ec));
2369        }
2370    }
2371}
2372
2373/**
2374 * Test new property set syntax
2375 */
2376void TransliteratorTest::TestPropertySet() {
2377    expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378    expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379           "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380}
2381
2382/**
2383 * Test various failure points of the new 2.0 engine.
2384 */
2385void TransliteratorTest::TestNewEngine() {
2386    UParseError pe;
2387    UErrorCode ec = U_ZERO_ERROR;
2388    Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2389    if (t == 0 || U_FAILURE(ec)) {
2390        dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2391        return;
2392    }
2393    // Katakana should be untouched
2394    expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2395           CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2396
2397    delete t;
2398
2399#if 1
2400    // This test will only work if Transliterator.ROLLBACK is
2401    // true.  Otherwise, this test will fail, revealing a
2402    // limitation of global filters in incremental mode.
2403    Transliterator *a =
2404        Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2405    Transliterator *A =
2406        Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2407    if (U_FAILURE(ec)) {
2408        delete a;
2409        delete A;
2410        return;
2411    }
2412
2413    Transliterator* array[3];
2414    array[0] = a;
2415    array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2416    array[2] = A;
2417    if (U_FAILURE(ec)) {
2418        errln("FAIL: createInstance NFD");
2419        delete a;
2420        delete A;
2421        delete array[1];
2422        return;
2423    }
2424
2425    t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2426    if (U_FAILURE(ec)) {
2427        errln("FAIL: UnicodeSet constructor");
2428        delete a;
2429        delete A;
2430        delete array[1];
2431        delete t;
2432        return;
2433    }
2434
2435    expect(*t, "aAaA", "bAbA");
2436
2437    assertTrue("countElements", t->countElements() == 3);
2438    assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2439    assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2440    assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2441    assertSuccess("getElement", ec);
2442
2443    delete a;
2444    delete A;
2445    delete array[1];
2446    delete t;
2447#endif
2448
2449    expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450           "a",
2451           "ax");
2452
2453    UnicodeString gr = CharsToUnicodeString(
2454        "$ddot = \\u0308 ;"
2455        "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456        "$rough = \\u0314 ;"
2457        "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458        "\\u03b1 <> a ;"
2459        "$rough <> h ;");
2460
2461    expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462}
2463
2464/**
2465 * Test quantified segment behavior.  We want:
2466 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2467 */
2468void TransliteratorTest::TestQuantifiedSegment(void) {
2469    // The normal case
2470    expect("([abc]+) > x $1 x;", "cba", "xcbax");
2471
2472    // The tricky case; the quantifier is around the segment
2473    expect("([abc])+ > x $1 x;", "cba", "xax");
2474
2475    // Tricky case in reverse direction
2476    expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2477
2478    // Check post-context segment
2479    expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2480
2481    // Test toRule/toPattern for non-quantified segment.
2482    // Careful with spacing here.
2483    UnicodeString r("([a-c]){q} > x $1 x;");
2484    UParseError pe;
2485    UErrorCode ec = U_ZERO_ERROR;
2486    Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2487    if (U_FAILURE(ec)) {
2488        errln("FAIL: createFromRules");
2489        delete t;
2490        return;
2491    }
2492    UnicodeString rr;
2493    t->toRules(rr, TRUE);
2494    if (r != rr) {
2495        errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496    } else {
2497        logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2498    }
2499    delete t;
2500
2501    // Test toRule/toPattern for quantified segment.
2502    // Careful with spacing here.
2503    r = "([a-c])+{q} > x $1 x;";
2504    t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2505    if (U_FAILURE(ec)) {
2506        errln("FAIL: createFromRules");
2507        delete t;
2508        return;
2509    }
2510    t->toRules(rr, TRUE);
2511    if (r != rr) {
2512        errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513    } else {
2514        logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2515    }
2516    delete t;
2517}
2518
2519//======================================================================
2520// Ram's tests
2521//======================================================================
2522void TransliteratorTest::TestDevanagariLatinRT(){
2523    const int MAX_LEN= 52;
2524    const char* const source[MAX_LEN] = {
2525        "bh\\u0101rata",
2526        "kra",
2527        "k\\u1E63a",
2528        "khra",
2529        "gra",
2530        "\\u1E45ra",
2531        "cra",
2532        "chra",
2533        "j\\u00F1a",
2534        "jhra",
2535        "\\u00F1ra",
2536        "\\u1E6Dya",
2537        "\\u1E6Dhra",
2538        "\\u1E0Dya",
2539      //"r\\u0323ya", // \u095c is not valid in Devanagari
2540        "\\u1E0Dhya",
2541        "\\u1E5Bhra",
2542        "\\u1E47ra",
2543        "tta",
2544        "thra",
2545        "dda",
2546        "dhra",
2547        "nna",
2548        "pra",
2549        "phra",
2550        "bra",
2551        "bhra",
2552        "mra",
2553        "\\u1E49ra",
2554      //"l\\u0331ra",
2555        "yra",
2556        "\\u1E8Fra",
2557      //"l-",
2558        "vra",
2559        "\\u015Bra",
2560        "\\u1E63ra",
2561        "sra",
2562        "hma",
2563        "\\u1E6D\\u1E6Da",
2564        "\\u1E6D\\u1E6Dha",
2565        "\\u1E6Dh\\u1E6Dha",
2566        "\\u1E0D\\u1E0Da",
2567        "\\u1E0D\\u1E0Dha",
2568        "\\u1E6Dya",
2569        "\\u1E6Dhya",
2570        "\\u1E0Dya",
2571        "\\u1E0Dhya",
2572        // Not roundtrippable --
2573        // \\u0939\\u094d\\u094d\\u092E  - hma
2574        // \\u0939\\u094d\\u092E         - hma
2575        // CharsToUnicodeString("hma"),
2576        "hya",
2577        "\\u015Br\\u0325",
2578        "\\u015Bca",
2579        "\\u0115",
2580        "san\\u0304j\\u012Bb s\\u0113nagupta",
2581        "\\u0101nand vaddir\\u0101ju",
2582        "\\u0101",
2583        "a"
2584    };
2585    const char* const expected[MAX_LEN] = {
2586        "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2587        "\\u0915\\u094D\\u0930",          /* kra         */
2588        "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2589        "\\u0916\\u094D\\u0930",          /* khra        */
2590        "\\u0917\\u094D\\u0930",          /* gra         */
2591        "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2592        "\\u091A\\u094D\\u0930",          /* cra         */
2593        "\\u091B\\u094D\\u0930",          /* chra        */
2594        "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2595        "\\u091D\\u094D\\u0930",          /* jhra        */
2596        "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2597        "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2598        "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2599        "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2600      //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2601        "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2602        "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2603        "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2604        "\\u0924\\u094D\\u0924",          /* tta         */
2605        "\\u0925\\u094D\\u0930",          /* thra        */
2606        "\\u0926\\u094D\\u0926",          /* dda         */
2607        "\\u0927\\u094D\\u0930",          /* dhra        */
2608        "\\u0928\\u094D\\u0928",          /* nna         */
2609        "\\u092A\\u094D\\u0930",          /* pra         */
2610        "\\u092B\\u094D\\u0930",          /* phra        */
2611        "\\u092C\\u094D\\u0930",          /* bra         */
2612        "\\u092D\\u094D\\u0930",          /* bhra        */
2613        "\\u092E\\u094D\\u0930",          /* mra         */
2614        "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2615      //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2616        "\\u092F\\u094D\\u0930",          /* yra         */
2617        "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2618      //"l-",
2619        "\\u0935\\u094D\\u0930",          /* vra         */
2620        "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2621        "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2622        "\\u0938\\u094D\\u0930",          /* sra         */
2623        "\\u0939\\u094d\\u092E",          /* hma         */
2624        "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2625        "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2626        "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2627        "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2628        "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2629        "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2630        "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2631        "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2632        "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2633     // "hma",                         /* hma         */
2634        "\\u0939\\u094D\\u092F",          /* hya         */
2635        "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2636        "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2637        "\\u090d",                        /* e\\u0306    */
2638        "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639        "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640        "\\u0906",
2641        "\\u0905",
2642    };
2643    UErrorCode status = U_ZERO_ERROR;
2644    UParseError parseError;
2645    UnicodeString message;
2646    Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2647    Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2648    if(U_FAILURE(status)){
2649        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2650        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2651        return;
2652    }
2653    UnicodeString gotResult;
2654    for(int i= 0; i<MAX_LEN; i++){
2655        gotResult = source[i];
2656        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2657        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2658    }
2659    delete latinToDev;
2660    delete devToLatin;
2661}
2662
2663void TransliteratorTest::TestTeluguLatinRT(){
2664    const int MAX_LEN=10;
2665    const char* const source[MAX_LEN] = {
2666        "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2667        "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2668        "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2669        "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2670        "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2671        "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2672        "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2673        "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2674        "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2675        "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2676    };
2677
2678    const char* const expected[MAX_LEN] = {
2679        "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680        "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681        "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682        "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683        "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684        "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685        "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686        "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687        "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688        "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689    };
2690
2691    UErrorCode status = U_ZERO_ERROR;
2692    UParseError parseError;
2693    UnicodeString message;
2694    Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2695    Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2696    if(U_FAILURE(status)){
2697        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2698        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2699        return;
2700    }
2701    UnicodeString gotResult;
2702    for(int i= 0; i<MAX_LEN; i++){
2703        gotResult = source[i];
2704        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2705        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2706    }
2707    delete latinToDev;
2708    delete devToLatin;
2709}
2710
2711void TransliteratorTest::TestSanskritLatinRT(){
2712    const int MAX_LEN =16;
2713    const char* const source[MAX_LEN] = {
2714        "rmk\\u1E63\\u0113t",
2715        "\\u015Br\\u012Bmad",
2716        "bhagavadg\\u012Bt\\u0101",
2717        "adhy\\u0101ya",
2718        "arjuna",
2719        "vi\\u1E63\\u0101da",
2720        "y\\u014Dga",
2721        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722        "uv\\u0101cr\\u0325",
2723        "dharmak\\u1E63\\u0113tr\\u0113",
2724        "kuruk\\u1E63\\u0113tr\\u0113",
2725        "samav\\u0113t\\u0101",
2726        "yuyutsava\\u1E25",
2727        "m\\u0101mak\\u0101\\u1E25",
2728    // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729        "kimakurvata",
2730        "san\\u0304java",
2731    };
2732    const char* const expected[MAX_LEN] = {
2733        "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739        "\\u092f\\u094b\\u0917",
2740        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742        "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747    //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749        "\\u0938\\u0902\\u091c\\u0935",
2750    };
2751    UErrorCode status = U_ZERO_ERROR;
2752    UParseError parseError;
2753    UnicodeString message;
2754    Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2755    Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2756    if(U_FAILURE(status)){
2757        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2758        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2759        return;
2760    }
2761    UnicodeString gotResult;
2762    for(int i= 0; i<MAX_LEN; i++){
2763        gotResult = source[i];
2764        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2765        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2766    }
2767    delete latinToDev;
2768    delete devToLatin;
2769}
2770
2771
2772void TransliteratorTest::TestCompoundLatinRT(){
2773    const char* const source[] = {
2774        "rmk\\u1E63\\u0113t",
2775        "\\u015Br\\u012Bmad",
2776        "bhagavadg\\u012Bt\\u0101",
2777        "adhy\\u0101ya",
2778        "arjuna",
2779        "vi\\u1E63\\u0101da",
2780        "y\\u014Dga",
2781        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782        "uv\\u0101cr\\u0325",
2783        "dharmak\\u1E63\\u0113tr\\u0113",
2784        "kuruk\\u1E63\\u0113tr\\u0113",
2785        "samav\\u0113t\\u0101",
2786        "yuyutsava\\u1E25",
2787        "m\\u0101mak\\u0101\\u1E25",
2788     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789        "kimakurvata",
2790        "san\\u0304java"
2791    };
2792    const int MAX_LEN = UPRV_LENGTHOF(source);
2793    const char* const expected[MAX_LEN] = {
2794        "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800        "\\u092f\\u094b\\u0917",
2801        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803        "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810        "\\u0938\\u0902\\u091c\\u0935"
2811    };
2812    if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2813        errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814        return;
2815    }
2816
2817    UErrorCode status = U_ZERO_ERROR;
2818    UParseError parseError;
2819    UnicodeString message;
2820    Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2821    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2822    Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2823    Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2824
2825    if(U_FAILURE(status)){
2826        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2827        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2828        return;
2829    }
2830    UnicodeString gotResult;
2831    for(int i= 0; i<MAX_LEN; i++){
2832        gotResult = source[i];
2833        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2834        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835        expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2836
2837    }
2838    delete(latinToDevToLatin);
2839    delete(devToLatinToDev);
2840    delete(devToTelToDev);
2841    delete(latinToTelToLatin);
2842}
2843
2844/**
2845 * Test Gurmukhi-Devanagari Tippi and Bindi
2846 */
2847void TransliteratorTest::TestGurmukhiDevanagari(){
2848    // the rule says:
2849    // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2850    // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2851    UErrorCode status = U_ZERO_ERROR;
2852    UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2853    UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2854    UParseError parseError;
2855
2856    UnicodeSetIterator vIter(vowel);
2857    UnicodeSetIterator nvIter(non_vowel);
2858    Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2859    if(U_FAILURE(status)) {
2860      dataerrln("Error creating transliterator %s", u_errorName(status));
2861      delete trans;
2862      return;
2863    }
2864    UnicodeString src (" \\u0902", -1, US_INV);
2865    UnicodeString expected(" \\u0A02", -1, US_INV);
2866    src = src.unescape();
2867    expected= expected.unescape();
2868
2869    while(vIter.next()){
2870        src.setCharAt(0,(UChar) vIter.getCodepoint());
2871        expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2872        expect(*trans,src,expected);
2873    }
2874
2875    expected.setCharAt(1,0x0A70);
2876    while(nvIter.next()){
2877        //src.setCharAt(0,(char) nvIter.codepoint);
2878        src.setCharAt(0,(UChar)nvIter.getCodepoint());
2879        expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2880        expect(*trans,src,expected);
2881    }
2882    delete trans;
2883}
2884/**
2885 * Test instantiation from a locale.
2886 */
2887void TransliteratorTest::TestLocaleInstantiation(void) {
2888    UParseError pe;
2889    UErrorCode ec = U_ZERO_ERROR;
2890    Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2891    if (U_FAILURE(ec)) {
2892        dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2893        delete t;
2894        return;
2895    }
2896    expect(*t, CharsToUnicodeString("\\u0430"), "a");
2897    delete t;
2898
2899    t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2900    if (U_FAILURE(ec)) {
2901        errln("FAIL: createInstance(en-el)");
2902        delete t;
2903        return;
2904    }
2905    expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2906    delete t;
2907}
2908
2909/**
2910 * Test title case handling of accent (should ignore accents)
2911 */
2912void TransliteratorTest::TestTitleAccents(void) {
2913    UParseError pe;
2914    UErrorCode ec = U_ZERO_ERROR;
2915    Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2916    if (U_FAILURE(ec)) {
2917        errln("FAIL: createInstance(Title)");
2918        delete t;
2919        return;
2920    }
2921    expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2922    delete t;
2923}
2924
2925/**
2926 * Basic test of a locale resource based rule.
2927 */
2928void TransliteratorTest::TestLocaleResource() {
2929    const char* DATA[] = {
2930        // id                    from               to
2931        //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2932        "Latin-el",              "b",               "\\u03bc\\u03c0",
2933        "Latin-Greek",           "b",               "\\u03B2",
2934        "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2935        "el-Latin",              "\\u03B2",         "v",
2936        "Greek-Latin",           "\\u03B2",         "b",
2937    };
2938    const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2939    for (int32_t i=0; i<DATA_length; i+=3) {
2940        UParseError pe;
2941        UErrorCode ec = U_ZERO_ERROR;
2942        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2943        if (U_FAILURE(ec)) {
2944            dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2945            delete t;
2946            continue;
2947        }
2948        expect(*t, CharsToUnicodeString(DATA[i+1]),
2949               CharsToUnicodeString(DATA[i+2]));
2950        delete t;
2951    }
2952}
2953
2954/**
2955 * Make sure parse errors reference the right line.
2956 */
2957void TransliteratorTest::TestParseError() {
2958    static const char* rule =
2959        "a > b;\n"
2960        "# more stuff\n"
2961        "d << b;";
2962    UErrorCode ec = U_ZERO_ERROR;
2963    UParseError pe;
2964    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2965    delete t;
2966    if (U_FAILURE(ec)) {
2967        UnicodeString err(pe.preContext);
2968        err.append((UChar)124/*|*/).append(pe.postContext);
2969        if (err.indexOf("d << b") >= 0) {
2970            logln("Ok: " + err);
2971        } else {
2972            errln("FAIL: " + err);
2973        }
2974    }
2975    else {
2976        errln("FAIL: no syntax error");
2977    }
2978    static const char* maskingRule =
2979        "a>x;\n"
2980        "# more stuff\n"
2981        "ab>y;";
2982    ec = U_ZERO_ERROR;
2983    delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2984    if (ec != U_RULE_MASK_ERROR) {
2985        errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2986    }
2987    else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2988        errln("FAIL: did not get expected precontext");
2989    }
2990    else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2991        errln("FAIL: did not get expected postcontext");
2992    }
2993}
2994
2995/**
2996 * Make sure sets on output are disallowed.
2997 */
2998void TransliteratorTest::TestOutputSet() {
2999    UnicodeString rule = "$set = [a-cm-n]; b > $set;";
3000    UErrorCode ec = U_ZERO_ERROR;
3001    UParseError pe;
3002    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3003    delete t;
3004    if (U_FAILURE(ec)) {
3005        UnicodeString err(pe.preContext);
3006        err.append((UChar)124/*|*/).append(pe.postContext);
3007        logln("Ok: " + err);
3008        return;
3009    }
3010    errln("FAIL: No syntax error");
3011}
3012
3013/**
3014 * Test the use variable range pragma, making sure that use of
3015 * variable range characters is detected and flagged as an error.
3016 */
3017void TransliteratorTest::TestVariableRange() {
3018    UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019    UErrorCode ec = U_ZERO_ERROR;
3020    UParseError pe;
3021    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3022    delete t;
3023    if (U_FAILURE(ec)) {
3024        UnicodeString err(pe.preContext);
3025        err.append((UChar)124/*|*/).append(pe.postContext);
3026        logln("Ok: " + err);
3027        return;
3028    }
3029    errln("FAIL: No syntax error");
3030}
3031
3032/**
3033 * Test invalid post context error handling
3034 */
3035void TransliteratorTest::TestInvalidPostContext() {
3036    UnicodeString rule = "a}b{c>d;";
3037    UErrorCode ec = U_ZERO_ERROR;
3038    UParseError pe;
3039    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3040    delete t;
3041    if (U_FAILURE(ec)) {
3042        UnicodeString err(pe.preContext);
3043        err.append((UChar)124/*|*/).append(pe.postContext);
3044        if (err.indexOf("a}b{c") >= 0) {
3045            logln("Ok: " + err);
3046        } else {
3047            errln("FAIL: " + err);
3048        }
3049        return;
3050    }
3051    errln("FAIL: No syntax error");
3052}
3053
3054/**
3055 * Test ID form variants
3056 */
3057void TransliteratorTest::TestIDForms() {
3058    const char* DATA[] = {
3059        "NFC", NULL, "NFD",
3060        "nfd", NULL, "NFC", // make sure case is ignored
3061        "Any-NFKD", NULL, "Any-NFKC",
3062        "Null", NULL, "Null",
3063        "-nfkc", "nfkc", "NFKD",
3064        "-nfkc/", "nfkc", "NFKD",
3065        "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3066        "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067        "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068        "Source-", NULL, NULL,
3069        "Source/Variant-", NULL, NULL,
3070        "Source-/Variant", NULL, NULL,
3071        "/Variant", NULL, NULL,
3072        "/Variant-", NULL, NULL,
3073        "-/Variant", NULL, NULL,
3074        "-/", NULL, NULL,
3075        "-", NULL, NULL,
3076        "/", NULL, NULL,
3077    };
3078    const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3079
3080    for (int32_t i=0; i<DATA_length; i+=3) {
3081        const char* ID = DATA[i];
3082        const char* expID = DATA[i+1];
3083        const char* expInvID = DATA[i+2];
3084        UBool expValid = (expInvID != NULL);
3085        if (expID == NULL) {
3086            expID = ID;
3087        }
3088        UParseError pe;
3089        UErrorCode ec = U_ZERO_ERROR;
3090        Transliterator *t =
3091            Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3092        if (U_FAILURE(ec)) {
3093            if (!expValid) {
3094                logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3095            } else {
3096                dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3097            }
3098            delete t;
3099            continue;
3100        }
3101        Transliterator *u = t->createInverse(ec);
3102        if (U_FAILURE(ec)) {
3103            errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3104            delete t;
3105            delete u;
3106            continue;
3107        }
3108        if (t->getID() == expID &&
3109            u->getID() == expInvID) {
3110            logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3111        } else {
3112            errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3113                  t->getID() + " x getInverse() => " + u->getID() +
3114                  ", expected " + expInvID);
3115        }
3116        delete t;
3117        delete u;
3118    }
3119}
3120
3121static const UChar SPACE[]   = {32,0};
3122static const UChar NEWLINE[] = {10,0};
3123static const UChar RETURN[]  = {13,0};
3124static const UChar EMPTY[]   = {0};
3125
3126void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3127                                    const UnicodeString& testRulesForward) {
3128    UnicodeString rules2; t2.toRules(rules2, TRUE);
3129    //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130    rules2.findAndReplace(SPACE, EMPTY);
3131    rules2.findAndReplace(NEWLINE, EMPTY);
3132    rules2.findAndReplace(RETURN, EMPTY);
3133
3134    UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3135
3136    if (rules2 != testRules) {
3137        errln(label);
3138        logln((UnicodeString)"GENERATED RULES: " + rules2);
3139        logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3140    }
3141}
3142
3143/**
3144 * Mark's toRules test.
3145 */
3146void TransliteratorTest::TestToRulesMark() {
3147    const char* testRules =
3148        "::[[:Latin:][:Mark:]];"
3149        "::NFKD (NFC);"
3150        "::Lower (Lower);"
3151        "a <> \\u03B1;" // alpha
3152        "::NFKC (NFD);"
3153        "::Upper (Lower);"
3154        "::Lower ();"
3155        "::([[:Greek:][:Mark:]]);"
3156        ;
3157    const char* testRulesForward =
3158        "::[[:Latin:][:Mark:]];"
3159        "::NFKD(NFC);"
3160        "::Lower(Lower);"
3161        "a > \\u03B1;"
3162        "::NFKC(NFD);"
3163        "::Upper (Lower);"
3164        "::Lower ();"
3165        ;
3166    const char* testRulesBackward =
3167        "::[[:Greek:][:Mark:]];"
3168        "::Lower (Upper);"
3169        "::NFD(NFKC);"
3170        "\\u03B1 > a;"
3171        "::Lower(Lower);"
3172        "::NFC(NFKD);"
3173        ;
3174    UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3175    UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3176
3177    UParseError pe;
3178    UErrorCode ec = U_ZERO_ERROR;
3179    Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3180    Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3181
3182    if (U_FAILURE(ec)) {
3183        delete t2;
3184        delete t3;
3185        dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3186        return;
3187    }
3188
3189    expect(*t2, source, target);
3190    expect(*t3, target, source);
3191
3192    checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3193    checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3194
3195    delete t2;
3196    delete t3;
3197}
3198
3199/**
3200 * Test Escape and Unescape transliterators.
3201 */
3202void TransliteratorTest::TestEscape() {
3203    UParseError pe;
3204    UErrorCode ec;
3205    Transliterator *t;
3206
3207    ec = U_ZERO_ERROR;
3208    t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3209    if (U_FAILURE(ec)) {
3210        errln((UnicodeString)"FAIL: createInstance");
3211    } else {
3212        expect(*t,
3213               UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3214               "@12Q");
3215    }
3216    delete t;
3217
3218    ec = U_ZERO_ERROR;
3219    t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3220    if (U_FAILURE(ec)) {
3221        errln((UnicodeString)"FAIL: createInstance");
3222    } else {
3223        expect(*t,
3224               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3225               UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3226    }
3227    delete t;
3228
3229    ec = U_ZERO_ERROR;
3230    t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3231    if (U_FAILURE(ec)) {
3232        errln((UnicodeString)"FAIL: createInstance");
3233    } else {
3234        expect(*t,
3235               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3236               UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3237    }
3238    delete t;
3239
3240    ec = U_ZERO_ERROR;
3241    t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3242    if (U_FAILURE(ec)) {
3243        errln((UnicodeString)"FAIL: createInstance");
3244    } else {
3245        expect(*t,
3246               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3247               UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3248    }
3249    delete t;
3250}
3251
3252
3253void TransliteratorTest::TestAnchorMasking(){
3254    UnicodeString rule ("^a > Q; a > q;");
3255    UErrorCode status= U_ZERO_ERROR;
3256    UParseError parseError;
3257
3258    Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3259    if(U_FAILURE(status)){
3260        errln(UnicodeString("FAIL: ") + "ID" +
3261              ".createFromRules() => bad rules" +
3262              /*", parse error " + parseError.code +*/
3263              ", line " + parseError.line +
3264              ", offset " + parseError.offset +
3265              ", context " + prettify(parseError.preContext, TRUE) +
3266              ", rules: " + prettify(rule, TRUE));
3267    }
3268    delete t;
3269}
3270
3271/**
3272 * Make sure display names of variants look reasonable.
3273 */
3274void TransliteratorTest::TestDisplayName() {
3275#if UCONFIG_NO_FORMATTING
3276    logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3277    return;
3278#else
3279    static const char* DATA[] = {
3280        // ID, forward name, reverse name
3281        // Update the text as necessary -- the important thing is
3282        // not the text itself, but how various cases are handled.
3283
3284        // Basic test
3285        "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3286
3287        // Variants
3288        "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3289
3290        // Target-only IDs
3291        "NFC", "Any to NFC", "Any to NFD",
3292    };
3293
3294    int32_t DATA_length = UPRV_LENGTHOF(DATA);
3295
3296    Locale US("en", "US");
3297
3298    for (int32_t i=0; i<DATA_length; i+=3) {
3299        UnicodeString name;
3300        Transliterator::getDisplayName(DATA[i], US, name);
3301        if (name != DATA[i+1]) {
3302            dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3303                  name + ", expected " + DATA[i+1]);
3304        } else {
3305            logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3306        }
3307        UErrorCode ec = U_ZERO_ERROR;
3308        UParseError pe;
3309        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3310        if (U_FAILURE(ec)) {
3311            delete t;
3312            dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3313            continue;
3314        }
3315        name = Transliterator::getDisplayName(t->getID(), US, name);
3316        if (name != DATA[i+2]) {
3317            dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3318                  name + ", expected " + DATA[i+2]);
3319        } else {
3320            logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3321        }
3322        delete t;
3323    }
3324#endif
3325}
3326
3327void TransliteratorTest::TestSpecialCases(void) {
3328    const UnicodeString registerRules[] = {
3329        "Any-Dev1", "x > X; y > Y;",
3330        "Any-Dev2", "XY > Z",
3331        "Greek-Latin/FAKE",
3332            CharsToUnicodeString
3333            ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3334        "" // END MARKER
3335    };
3336
3337    const UnicodeString testCases[] = {
3338        // NORMALIZATION
3339        // should add more test cases
3340        "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341        "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342        "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343        "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3344
3345        // mp -> b BUG
3346        "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347        "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3348
3349        // check for devanagari bug
3350        "nfd;Dev1;Dev2;nfc", "xy", "Z",
3351
3352        // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3353        "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354                 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3355
3356        //TODO: enable this test once Titlecase works right
3357        /*
3358        "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359                 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3360                 */
3361        "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362                 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3363        "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3364                 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3365
3366        "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3367        "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3368
3369         // FORMS OF S
3370        "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3371                               CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3372        "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3373                               CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3374        "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375                        CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3376        "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3377                        CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3378        // Tatiana bug
3379        // Upper: TAT\\u02B9\\u00C2NA
3380        // Lower: tat\\u02B9\\u00E2na
3381        // Title: Tat\\u02B9\\u00E2na
3382        "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383                 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384        "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3385                 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386        "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3387                 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3388
3389        "" // END MARKER
3390    };
3391
3392    UParseError pos;
3393    int32_t i;
3394    for (i = 0; registerRules[i].length()!=0; i+=2) {
3395        UErrorCode status = U_ZERO_ERROR;
3396
3397        Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3398            registerRules[i+1], UTRANS_FORWARD, pos, status);
3399        if (U_FAILURE(status)) {
3400            dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3401        } else {
3402            Transliterator::registerInstance(t);
3403        }
3404    }
3405    for (i = 0; testCases[i].length()!=0; i+=3) {
3406        UErrorCode ec = U_ZERO_ERROR;
3407        UParseError pe;
3408        const UnicodeString& name = testCases[i];
3409        Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3410        if (U_FAILURE(ec)) {
3411            dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3412            delete t;
3413            continue;
3414        }
3415        const UnicodeString& id = t->getID();
3416        const UnicodeString& source = testCases[i+1];
3417        UnicodeString target;
3418
3419        // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3420
3421        if (testCases[i+2].length() > 0) {
3422            target = testCases[i+2];
3423        } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3424            Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3425        } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3426            Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3427        } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3428            Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3429        } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3430            Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3431        } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3432            target = source;
3433            target.toLower(Locale::getUS());
3434        } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3435            target = source;
3436            target.toUpper(Locale::getUS());
3437        }
3438        if (U_FAILURE(ec)) {
3439            errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3440            continue;
3441        }
3442
3443        expect(*t, source, target);
3444        delete t;
3445    }
3446    for (i = 0; registerRules[i].length()!=0; i+=2) {
3447        Transliterator::unregister(registerRules[i]);
3448    }
3449}
3450
3451char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3452    if (ch <= 0xFFFF) {
3453        sprintf(buffer, "\\u%04x", (int)ch);
3454    } else {
3455        sprintf(buffer, "\\U%08x", (int)ch);
3456    }
3457    return buffer;
3458}
3459
3460void TransliteratorTest::TestSurrogateCasing (void) {
3461    // check that casing handles surrogates
3462    // titlecase is currently defective
3463    char buffer[20];
3464    UChar buffer2[20];
3465    UChar32 dee;
3466    U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3467    UnicodeString DEE(u_totitle(dee));
3468    if (DEE != DESERET_DEE) {
3469        err("Fails titlecase of surrogates");
3470        err(Char32ToEscapedChars(dee, buffer));
3471        err(", ");
3472        errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3473    }
3474
3475    UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3476    UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3477    UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3478    UErrorCode status= U_ZERO_ERROR;
3479
3480    u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3481    if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3482        errln("Fails: Can't uppercase surrogates.");
3483    }
3484
3485    status= U_ZERO_ERROR;
3486    u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3487    if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3488        errln("Fails: Can't lowercase surrogates.");
3489    }
3490}
3491
3492static void _trans(Transliterator& t, const UnicodeString& src,
3493                   UnicodeString& result) {
3494    result = src;
3495    t.transliterate(result);
3496}
3497
3498static void _trans(const UnicodeString& id, const UnicodeString& src,
3499                   UnicodeString& result, UErrorCode ec) {
3500    UParseError pe;
3501    Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3502    if (U_SUCCESS(ec)) {
3503        _trans(*t, src, result);
3504    }
3505    delete t;
3506}
3507
3508static UnicodeString _findMatch(const UnicodeString& source,
3509                                       const UnicodeString* pairs) {
3510    UnicodeString empty;
3511    for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3512        if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3513            return pairs[i+1];
3514        }
3515    }
3516    return empty;
3517}
3518
3519// Check to see that incremental gets at least part way through a reasonable string.
3520
3521void TransliteratorTest::TestIncrementalProgress(void) {
3522    UErrorCode ec = U_ZERO_ERROR;
3523    UnicodeString latinTest = "The Quick Brown Fox.";
3524    UnicodeString devaTest;
3525    _trans("Latin-Devanagari", latinTest, devaTest, ec);
3526    UnicodeString kataTest;
3527    _trans("Latin-Katakana", latinTest, kataTest, ec);
3528    if (U_FAILURE(ec)) {
3529        errln("FAIL: Internal error");
3530        return;
3531    }
3532    const UnicodeString tests[] = {
3533        "Any", latinTest,
3534        "Latin", latinTest,
3535        "Halfwidth", latinTest,
3536        "Devanagari", devaTest,
3537        "Katakana", kataTest,
3538        "" // END MARKER
3539    };
3540
3541    UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3542    int32_t i = 0, j=0, k=0;
3543    int32_t sources = Transliterator::countAvailableSources();
3544    for (i = 0; i < sources; i++) {
3545        UnicodeString source;
3546        Transliterator::getAvailableSource(i, source);
3547        UnicodeString test = _findMatch(source, tests);
3548        if (test.length() == 0) {
3549            logln((UnicodeString)"Skipping " + source + "-X");
3550            continue;
3551        }
3552        int32_t targets = Transliterator::countAvailableTargets(source);
3553        for (j = 0; j < targets; j++) {
3554            UnicodeString target;
3555            Transliterator::getAvailableTarget(j, source, target);
3556            int32_t variants = Transliterator::countAvailableVariants(source, target);
3557            for (k =0; k< variants; k++) {
3558                UnicodeString variant;
3559                UParseError err;
3560                UErrorCode status = U_ZERO_ERROR;
3561
3562                Transliterator::getAvailableVariant(k, source, target, variant);
3563                UnicodeString id = source + "-" + target + "/" + variant;
3564
3565                Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3566                if (U_FAILURE(status)) {
3567                    dataerrln((UnicodeString)"FAIL: Could not create " + id);
3568                    delete t;
3569                    continue;
3570                }
3571                status = U_ZERO_ERROR;
3572                CheckIncrementalAux(t, test);
3573
3574                UnicodeString rev;
3575                _trans(*t, test, rev);
3576                Transliterator *inv = t->createInverse(status);
3577                if (U_FAILURE(status)) {
3578                    // The following are forward-only, it is OK that creating an inverse will not work:
3579                    // 1. Devanagari-Arabic
3580                    // 2. Any-*/BGN
3581                    // 2a. Any-*/BGN_1981
3582                    // 3. Any-*/UNGEGN
3583                    // 4. Any-*/MNS
3584                    // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3585                    if (    id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3586                         && !(id.startsWith((UnicodeString)"Any-") &&
3587                                (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3588                             )
3589#if UCONFIG_NO_BREAK_ITERATION
3590                         && id.compare((UnicodeString)"Latin-Thai/") != 0
3591#endif
3592                       )
3593                    {
3594                        errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3595                    }
3596                    delete t;
3597                    delete inv;
3598                    continue;
3599                }
3600                CheckIncrementalAux(inv, rev);
3601                delete t;
3602                delete inv;
3603            }
3604        }
3605    }
3606}
3607
3608void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3609                                                      const UnicodeString& input) {
3610    UErrorCode ec = U_ZERO_ERROR;
3611    UTransPosition pos;
3612    UnicodeString test = input;
3613
3614    pos.contextStart = 0;
3615    pos.contextLimit = input.length();
3616    pos.start = 0;
3617    pos.limit = input.length();
3618
3619    t->transliterate(test, pos, ec);
3620    if (U_FAILURE(ec)) {
3621        errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3622        return;
3623    }
3624    UBool gotError = FALSE;
3625    (void)gotError;    // Suppress set but not used warning.
3626
3627    // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3628
3629    if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3630        errln((UnicodeString)"No Progress, " +
3631              t->getID() + ": " + formatInput(test, input, pos));
3632        gotError = TRUE;
3633    } else {
3634        logln((UnicodeString)"PASS Progress, " +
3635              t->getID() + ": " + formatInput(test, input, pos));
3636    }
3637    t->finishTransliteration(test, pos);
3638    if (pos.start != pos.limit) {
3639        errln((UnicodeString)"Incomplete, " +
3640              t->getID() + ": " + formatInput(test, input, pos));
3641        gotError = TRUE;
3642    }
3643}
3644
3645void TransliteratorTest::TestFunction() {
3646    // Careful with spacing and ';' here:  Phrase this exactly
3647    // as toRules() is going to return it.  If toRules() changes
3648    // with regard to spacing or ';', then adjust this string.
3649    UnicodeString rule =
3650        "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3651
3652    UParseError pe;
3653    UErrorCode ec = U_ZERO_ERROR;
3654    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3655    if (t == NULL) {
3656        dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3657        return;
3658    }
3659
3660    UnicodeString r;
3661    t->toRules(r, TRUE);
3662    if (r == rule) {
3663        logln((UnicodeString)"OK: toRules() => " + r);
3664    } else {
3665        errln((UnicodeString)"FAIL: toRules() => " + r +
3666              ", expected " + rule);
3667    }
3668
3669    expect(*t, "The Quick Brown Fox",
3670           UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3671
3672    delete t;
3673}
3674
3675void TransliteratorTest::TestInvalidBackRef(void) {
3676    UnicodeString rule =  ". > $1;";
3677    UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3678    UParseError pe;
3679    UErrorCode ec = U_ZERO_ERROR;
3680    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3681    Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3682
3683    if (t != NULL) {
3684        errln("FAIL: createFromRules should have returned NULL");
3685        delete t;
3686    }
3687
3688    if (t2 != NULL) {
3689        errln("FAIL: createFromRules should have returned NULL");
3690        delete t2;
3691    }
3692
3693    if (U_SUCCESS(ec)) {
3694        errln("FAIL: Ok: . > $1; => no error");
3695    } else {
3696        logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3697    }
3698}
3699
3700void TransliteratorTest::TestMulticharStringSet() {
3701    // Basic testing
3702    const char* rule =
3703        "       [{aa}]       > x;"
3704        "         a          > y;"
3705        "       [b{bc}]      > z;"
3706        "[{gd}] { e          > q;"
3707        "         e } [{fg}] > r;" ;
3708
3709    UParseError pe;
3710    UErrorCode ec = U_ZERO_ERROR;
3711    Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3712    if (t == NULL || U_FAILURE(ec)) {
3713        delete t;
3714        errln("FAIL: createFromRules failed");
3715        return;
3716    }
3717
3718    expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3719           "y x yz z d gd de gdq gdqfg ddrfg");
3720    delete t;
3721
3722    // Overlapped string test.  Make sure that when multiple
3723    // strings can match that the longest one is matched.
3724    rule =
3725        "    [a {ab} {abc}]    > x;"
3726        "           b          > y;"
3727        "           c          > z;"
3728        " q [t {st} {rst}] { e > p;" ;
3729
3730    t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3731    if (t == NULL || U_FAILURE(ec)) {
3732        delete t;
3733        errln("FAIL: createFromRules failed");
3734        return;
3735    }
3736
3737    expect(*t, "a ab abc qte qste qrste",
3738           "x x x qtp qstp qrstp");
3739    delete t;
3740}
3741
3742// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3743// BEGIN TestUserFunction support factory
3744
3745Transliterator* _TUFF[4];
3746UnicodeString* _TUFID[4];
3747
3748static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3749                                   Transliterator::Token context) {
3750    return _TUFF[context.integer]->clone();
3751}
3752
3753static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3754    _TUFF[n] = t;
3755    _TUFID[n] = new UnicodeString(ID);
3756    Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3757}
3758
3759static void _TUFUnreg(int32_t n) {
3760    if (_TUFF[n] != NULL) {
3761        Transliterator::unregister(*_TUFID[n]);
3762        delete _TUFF[n];
3763        delete _TUFID[n];
3764    }
3765}
3766
3767// END TestUserFunction support factory
3768// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3769
3770/**
3771 * Test that user-registered transliterators can be used under function
3772 * syntax.
3773 */
3774void TransliteratorTest::TestUserFunction() {
3775
3776    Transliterator* t;
3777    UParseError pe;
3778    UErrorCode ec = U_ZERO_ERROR;
3779
3780    // Setup our factory
3781    int32_t i;
3782    for (i=0; i<4; ++i) {
3783        _TUFF[i] = NULL;
3784    }
3785
3786    // There's no need to register inverses if we don't use them
3787    t = Transliterator::createFromRules("gif",
3788                                        UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3789                                        UTRANS_FORWARD, pe, ec);
3790    if (t == NULL || U_FAILURE(ec)) {
3791        dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3792        return;
3793    }
3794    _TUFReg("Any-gif", t, 0);
3795
3796    t = Transliterator::createFromRules("RemoveCurly",
3797                                        UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3798                                        UTRANS_FORWARD, pe, ec);
3799    if (t == NULL || U_FAILURE(ec)) {
3800        errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3801        goto FAIL;
3802    }
3803    expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3804    _TUFReg("Any-RemoveCurly", t, 1);
3805
3806    logln("Trying &hex");
3807    t = Transliterator::createFromRules("hex2",
3808                                        "(.) > &hex($1);",
3809                                        UTRANS_FORWARD, pe, ec);
3810    if (t == NULL || U_FAILURE(ec)) {
3811        errln("FAIL: createFromRules");
3812        goto FAIL;
3813    }
3814    logln("Registering");
3815    _TUFReg("Any-hex2", t, 2);
3816    t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3817    if (t == NULL || U_FAILURE(ec)) {
3818        errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3819        goto FAIL;
3820    }
3821    expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3822    delete t;
3823
3824    logln("Trying &gif");
3825    t = Transliterator::createFromRules("gif2",
3826                                        "(.) > &Gif(&Hex2($1));",
3827                                        UTRANS_FORWARD, pe, ec);
3828    if (t == NULL || U_FAILURE(ec)) {
3829        errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3830        goto FAIL;
3831    }
3832    logln("Registering");
3833    _TUFReg("Any-gif2", t, 3);
3834    t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3835    if (t == NULL || U_FAILURE(ec)) {
3836        errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3837        goto FAIL;
3838    }
3839    expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3840           "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3841    delete t;
3842
3843    // Test that filters are allowed after &
3844    t = Transliterator::createFromRules("test",
3845                                        "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3846                                        UTRANS_FORWARD, pe, ec);
3847    if (t == NULL || U_FAILURE(ec)) {
3848        errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3849        goto FAIL;
3850    }
3851    expect(*t, "abc",
3852           UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3853    delete t;
3854
3855 FAIL:
3856    for (i=0; i<4; ++i) {
3857        _TUFUnreg(i);
3858    }
3859}
3860
3861/**
3862 * Test the Any-X transliterators.
3863 */
3864void TransliteratorTest::TestAnyX(void) {
3865    UParseError parseError;
3866    UErrorCode status = U_ZERO_ERROR;
3867    Transliterator* anyLatin =
3868        Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3869    if (anyLatin==0) {
3870        dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3871        delete anyLatin;
3872        return;
3873    }
3874
3875    expect(*anyLatin,
3876           CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3877           CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3878
3879    delete anyLatin;
3880}
3881
3882/**
3883 * Test Any-X transliterators with sample letters from all scripts.
3884 */
3885void TransliteratorTest::TestAny(void) {
3886    UErrorCode status = U_ZERO_ERROR;
3887    // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3888    //       function call parameters going on in this test.
3889    UnicodeSet alphabetic("[:alphabetic:]", status);
3890    if (U_FAILURE(status)) {
3891        dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3892        return;
3893    }
3894    alphabetic.freeze();
3895
3896    UnicodeString testString;
3897    for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3898        const char *scriptName = uscript_getShortName((UScriptCode)i);
3899        if (scriptName == NULL) {
3900            errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3901            return;
3902        }
3903
3904        UnicodeSet sample;
3905        sample.applyPropertyAlias("script", scriptName, status);
3906        if (U_FAILURE(status)) {
3907            errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3908            return;
3909        }
3910        sample.retainAll(alphabetic);
3911        for (int32_t count=0; count<5; count++) {
3912            UChar32 c = sample.charAt(count);
3913            if (c == -1) {
3914                break;
3915            }
3916            testString.append(c);
3917        }
3918    }
3919
3920    UParseError parseError;
3921    Transliterator* anyLatin =
3922        Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3923    if (U_FAILURE(status)) {
3924        dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3925        return;
3926    }
3927
3928    logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3929    anyLatin->transliterate(testString);
3930    logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3931    delete anyLatin;
3932}
3933
3934
3935/**
3936 * Test the source and target set API.  These are only implemented
3937 * for RBT and CompoundTransliterator at this time.
3938 */
3939void TransliteratorTest::TestSourceTargetSet() {
3940    UErrorCode ec = U_ZERO_ERROR;
3941
3942    // Rules
3943    const char* r =
3944        "a > b; "
3945        "r [x{lu}] > q;";
3946
3947    // Expected source
3948    UnicodeSet expSrc("[arx{lu}]", ec);
3949
3950    // Expected target
3951    UnicodeSet expTrg("[bq]", ec);
3952
3953    UParseError pe;
3954    Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3955
3956    if (U_FAILURE(ec)) {
3957        delete t;
3958        errln("FAIL: Couldn't set up test");
3959        return;
3960    }
3961
3962    UnicodeSet src; t->getSourceSet(src);
3963    UnicodeSet trg; t->getTargetSet(trg);
3964
3965    if (src == expSrc && trg == expTrg) {
3966        UnicodeString a, b;
3967        logln((UnicodeString)"Ok: " +
3968              r + " => source = " + src.toPattern(a, TRUE) +
3969              ", target = " + trg.toPattern(b, TRUE));
3970    } else {
3971        UnicodeString a, b, c, d;
3972        errln((UnicodeString)"FAIL: " +
3973              r + " => source = " + src.toPattern(a, TRUE) +
3974              ", expected " + expSrc.toPattern(b, TRUE) +
3975              "; target = " + trg.toPattern(c, TRUE) +
3976              ", expected " + expTrg.toPattern(d, TRUE));
3977    }
3978
3979    delete t;
3980}
3981
3982/**
3983 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3984 */
3985void TransliteratorTest::TestPatternWhiteSpace() {
3986    // Rules
3987    const char* r = "a > \\u200E b;";
3988
3989    UErrorCode ec = U_ZERO_ERROR;
3990    UParseError pe;
3991    Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3992
3993    if (U_FAILURE(ec)) {
3994        errln("FAIL: Couldn't set up test");
3995    } else {
3996        expect(*t, "a", "b");
3997    }
3998    delete t;
3999
4000    // UnicodeSet
4001    ec = U_ZERO_ERROR;
4002    UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4003
4004    if (U_FAILURE(ec)) {
4005        errln("FAIL: Couldn't set up test");
4006    } else {
4007        if (set.contains(0x200E)) {
4008            errln("FAIL: U+200E not being ignored by UnicodeSet");
4009        }
4010    }
4011}
4012//======================================================================
4013// this method is in TestUScript.java
4014//======================================================================
4015void TransliteratorTest::TestAllCodepoints(){
4016    UScriptCode code= USCRIPT_INVALID_CODE;
4017    char id[256]={'\0'};
4018    char abbr[256]={'\0'};
4019    char newId[256]={'\0'};
4020    char newAbbrId[256]={'\0'};
4021    char oldId[256]={'\0'};
4022    char oldAbbrId[256]={'\0'};
4023
4024    UErrorCode status =U_ZERO_ERROR;
4025    UParseError pe;
4026
4027    for(uint32_t i = 0; i<=0x10ffff; i++){
4028        code =  uscript_getScript(i,&status);
4029        if(code == USCRIPT_INVALID_CODE){
4030            dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4031        }
4032        const char* myId = uscript_getName(code);
4033        if(!myId) {
4034          dataerrln("Valid script code returned NULL name. Check your data!");
4035          return;
4036        }
4037        uprv_strcpy(id,myId);
4038        uprv_strcpy(abbr,uscript_getShortName(code));
4039
4040        uprv_strcpy(newId,"[:");
4041        uprv_strcat(newId,id);
4042        uprv_strcat(newId,":];NFD");
4043
4044        uprv_strcpy(newAbbrId,"[:");
4045        uprv_strcat(newAbbrId,abbr);
4046        uprv_strcat(newAbbrId,":];NFD");
4047
4048        if(uprv_strcmp(newId,oldId)!=0){
4049            Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4050            if(t==NULL || U_FAILURE(status)){
4051                dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4052            }
4053            delete t;
4054        }
4055        if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4056            Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4057            if(t==NULL || U_FAILURE(status)){
4058                dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4059            }
4060            delete t;
4061        }
4062        uprv_strcpy(oldId,newId);
4063        uprv_strcpy(oldAbbrId, newAbbrId);
4064
4065    }
4066
4067}
4068
4069#define TEST_TRANSLIT_ID(id, cls) { \
4070  UErrorCode ec = U_ZERO_ERROR; \
4071  Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4072  if (U_FAILURE(ec)) { \
4073    dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4074  } else { \
4075    if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4076      errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077    } \
4078    /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079  } \
4080  delete t; \
4081}
4082
4083#define TEST_TRANSLIT_RULE(rule, cls) { \
4084  UErrorCode ec = U_ZERO_ERROR; \
4085  UParseError pe; \
4086  Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4087  if (U_FAILURE(ec)) { \
4088    errln("FAIL: Couldn't create " rule); \
4089  } else { \
4090    if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4091      errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4092    } \
4093    /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4094  } \
4095  delete t; \
4096}
4097
4098void TransliteratorTest::TestBoilerplate() {
4099    TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4100    TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4101    TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4102    TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4103    TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4104    TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4105    TEST_TRANSLIT_ID("Null", NullTransliterator);
4106    TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4107    TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4108    TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4109    TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4110    TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4111    TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4112}
4113
4114void TransliteratorTest::TestAlternateSyntax() {
4115    // U+2206 == &
4116    // U+2190 == <
4117    // U+2192 == >
4118    // U+2194 == <>
4119    expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4120           "abc",
4121           "xbz");
4122    expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4123           CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4124           UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4125}
4126
4127static const char* BEGIN_END_RULES[] = {
4128    // [0]
4129    "abc > xy;"
4130    "aba > z;",
4131
4132    // [1]
4133/*
4134    "::BEGIN;"
4135    "abc > xy;"
4136    "::END;"
4137    "::BEGIN;"
4138    "aba > z;"
4139    "::END;",
4140*/
4141    "", // test case commented out below, this is here to keep from messing up the indexes
4142
4143    // [2]
4144/*
4145    "abc > xy;"
4146    "::BEGIN;"
4147    "aba > z;"
4148    "::END;",
4149*/
4150    "", // test case commented out below, this is here to keep from messing up the indexes
4151
4152    // [3]
4153/*
4154    "::BEGIN;"
4155    "abc > xy;"
4156    "::END;"
4157    "aba > z;",
4158*/
4159    "", // test case commented out below, this is here to keep from messing up the indexes
4160
4161    // [4]
4162    "abc > xy;"
4163    "::Null;"
4164    "aba > z;",
4165
4166    // [5]
4167    "::Upper;"
4168    "ABC > xy;"
4169    "AB > x;"
4170    "C > z;"
4171    "::Upper;"
4172    "XYZ > p;"
4173    "XY > q;"
4174    "Z > r;"
4175    "::Upper;",
4176
4177    // [6]
4178    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4179    "$delim = [\\-$ws];"
4180    "$ws $delim* > ' ';"
4181    "'-' $delim* > '-';",
4182
4183    // [7]
4184    "::Null;"
4185    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4186    "$delim = [\\-$ws];"
4187    "$ws $delim* > ' ';"
4188    "'-' $delim* > '-';",
4189
4190    // [8]
4191    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4192    "$delim = [\\-$ws];"
4193    "$ws $delim* > ' ';"
4194    "'-' $delim* > '-';"
4195    "::Null;",
4196
4197    // [9]
4198    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4199    "$delim = [\\-$ws];"
4200    "::Null;"
4201    "$ws $delim* > ' ';"
4202    "'-' $delim* > '-';",
4203
4204    // [10]
4205/*
4206    "::BEGIN;"
4207    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4208    "$delim = [\\-$ws];"
4209    "::END;"
4210    "$ws $delim* > ' ';"
4211    "'-' $delim* > '-';",
4212*/
4213    "", // test case commented out below, this is here to keep from messing up the indexes
4214
4215    // [11]
4216/*
4217    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4218    "$delim = [\\-$ws];"
4219    "::BEGIN;"
4220    "$ws $delim* > ' ';"
4221    "'-' $delim* > '-';"
4222    "::END;",
4223*/
4224    "", // test case commented out below, this is here to keep from messing up the indexes
4225
4226    // [12]
4227/*
4228    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4229    "$delim = [\\-$ws];"
4230    "$ab = [ab];"
4231    "::BEGIN;"
4232    "$ws $delim* > ' ';"
4233    "'-' $delim* > '-';"
4234    "::END;"
4235    "::BEGIN;"
4236    "$ab { ' ' } $ab > '-';"
4237    "c { ' ' > ;"
4238    "::END;"
4239    "::BEGIN;"
4240    "'a-a' > a\\%|a;"
4241    "::END;",
4242*/
4243    "", // test case commented out below, this is here to keep from messing up the indexes
4244
4245    // [13]
4246    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4247    "$delim = [\\-$ws];"
4248    "$ab = [ab];"
4249    "::Null;"
4250    "$ws $delim* > ' ';"
4251    "'-' $delim* > '-';"
4252    "::Null;"
4253    "$ab { ' ' } $ab > '-';"
4254    "c { ' ' > ;"
4255    "::Null;"
4256    "'a-a' > a\\%|a;",
4257
4258    // [14]
4259/*
4260    "::[abc];"
4261    "::BEGIN;"
4262    "abc > xy;"
4263    "::END;"
4264    "::BEGIN;"
4265    "aba > yz;"
4266    "::END;"
4267    "::Upper;",
4268*/
4269    "", // test case commented out below, this is here to keep from messing up the indexes
4270
4271    // [15]
4272    "::[abc];"
4273    "abc > xy;"
4274    "::Null;"
4275    "aba > yz;"
4276    "::Upper;",
4277
4278    // [16]
4279/*
4280    "::[abc];"
4281    "::BEGIN;"
4282    "abc <> xy;"
4283    "::END;"
4284    "::BEGIN;"
4285    "aba <> yz;"
4286    "::END;"
4287    "::Upper(Lower);"
4288    "::([XYZ]);"
4289*/
4290    "", // test case commented out below, this is here to keep from messing up the indexes
4291
4292    // [17]
4293    "::[abc];"
4294    "abc <> xy;"
4295    "::Null;"
4296    "aba <> yz;"
4297    "::Upper(Lower);"
4298    "::([XYZ]);"
4299};
4300
4301/*
4302(This entire test is commented out below and will need some heavy revision when we re-add
4303the ::BEGIN/::END stuff)
4304static const char* BOGUS_BEGIN_END_RULES[] = {
4305    // [7]
4306    "::BEGIN;"
4307    "abc > xy;"
4308    "::BEGIN;"
4309    "aba > z;"
4310    "::END;"
4311    "::END;",
4312
4313    // [8]
4314    "abc > xy;"
4315    " aba > z;"
4316    "::END;",
4317
4318    // [9]
4319    "::BEGIN;"
4320    "::Upper;"
4321    "::END;"
4322};
4323static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4324*/
4325
4326static const char* BEGIN_END_TEST_CASES[] = {
4327    // rules             input                   expected output
4328    BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4329//    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4330//    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4331//    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4332    BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4333    BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4334
4335    BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4336    BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4337    BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4338    BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4339//    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4340//    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4341//    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4342//    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4343//    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4344    BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4345    BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4346    BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4347
4348//    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4349    BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4350//    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4351    BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4352};
4353static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4354
4355void TransliteratorTest::TestBeginEnd() {
4356    // run through the list of test cases above
4357    int32_t i = 0;
4358    for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4359        expect((UnicodeString)"Test case #" + (i / 3),
4360               UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4361               UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4362               UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4363    }
4364
4365    // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4366    UParseError parseError;
4367    UErrorCode status = U_ZERO_ERROR;
4368    Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4369            UTRANS_REVERSE, parseError, status);
4370    if (reversed == 0 || U_FAILURE(status)) {
4371        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4372    } else {
4373        expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4374    }
4375    delete reversed;
4376
4377    // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4378    // that all of them cause errors
4379/*
4380(commented out until we have the real ::BEGIN/::END stuff in place
4381    for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4382        UParseError parseError;
4383        UErrorCode status = U_ZERO_ERROR;
4384        Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4385                UTRANS_FORWARD, parseError, status);
4386        if (!U_FAILURE(status)) {
4387            delete t;
4388            errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4389        }
4390    }
4391*/
4392}
4393
4394void TransliteratorTest::TestBeginEndToRules() {
4395    // run through the same list of test cases we used above, but this time, instead of just
4396    // instantiating a Transliterator from the rules and running the test against it, we instantiate
4397    // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4398    // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4399    // to (i.e., does the same thing as) the original rule set
4400    for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4401        UParseError parseError;
4402        UErrorCode status = U_ZERO_ERROR;
4403        Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4404                UTRANS_FORWARD, parseError, status);
4405        if (U_FAILURE(status)) {
4406            reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4407        } else {
4408            UnicodeString rules;
4409            t->toRules(rules, TRUE);
4410            Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4411                    UTRANS_FORWARD, parseError, status);
4412            if (U_FAILURE(status)) {
4413                reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4414                        parseError, status);
4415                delete t;
4416            } else {
4417                expect(*t2,
4418                       UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4419                       UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4420                delete t;
4421                delete t2;
4422            }
4423        }
4424    }
4425
4426    // do the same thing for the reversible test case
4427    UParseError parseError;
4428    UErrorCode status = U_ZERO_ERROR;
4429    Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4430            UTRANS_REVERSE, parseError, status);
4431    if (U_FAILURE(status)) {
4432        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4433    } else {
4434        UnicodeString rules;
4435        reversed->toRules(rules, FALSE);
4436        Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4437                parseError, status);
4438        if (U_FAILURE(status)) {
4439            reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4440                    parseError, status);
4441            delete reversed;
4442        } else {
4443            expect(*reversed2,
4444                   UnicodeString("xy XY XYZ yz YZ"),
4445                   UnicodeString("xy abc xaba yz aba"));
4446            delete reversed;
4447            delete reversed2;
4448        }
4449    }
4450}
4451
4452void TransliteratorTest::TestRegisterAlias() {
4453    UnicodeString longID("Lower;[aeiou]Upper");
4454    UnicodeString shortID("Any-CapVowels");
4455    UnicodeString reallyShortID("CapVowels");
4456
4457    Transliterator::registerAlias(shortID, longID);
4458
4459    UErrorCode err = U_ZERO_ERROR;
4460    Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4461    if (U_FAILURE(err)) {
4462        errln("Failed to instantiate transliterator with long ID");
4463        Transliterator::unregister(shortID);
4464        return;
4465    }
4466    Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4467    if (U_FAILURE(err)) {
4468        errln("Failed to instantiate transliterator with short ID");
4469        delete t1;
4470        Transliterator::unregister(shortID);
4471        return;
4472    }
4473
4474    if (t1->getID() != longID)
4475        errln("Transliterator instantiated with long ID doesn't have long ID");
4476    if (t2->getID() != reallyShortID)
4477        errln("Transliterator instantiated with short ID doesn't have short ID");
4478
4479    UnicodeString rules1;
4480    UnicodeString rules2;
4481
4482    t1->toRules(rules1, TRUE);
4483    t2->toRules(rules2, TRUE);
4484    if (rules1 != rules2)
4485        errln("Alias transliterators aren't the same");
4486
4487    delete t1;
4488    delete t2;
4489    Transliterator::unregister(shortID);
4490
4491    t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4492    if (U_SUCCESS(err)) {
4493        errln("Instantiation with short ID succeeded after short ID was unregistered");
4494        delete t1;
4495    }
4496
4497    // try the same thing again, but this time with something other than
4498    // an instance of CompoundTransliterator
4499    UnicodeString realID("Latin-Greek");
4500    UnicodeString fakeID("Latin-dlgkjdflkjdl");
4501    Transliterator::registerAlias(fakeID, realID);
4502
4503    err = U_ZERO_ERROR;
4504    t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4505    if (U_FAILURE(err)) {
4506        dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4507        Transliterator::unregister(realID);
4508        return;
4509    }
4510    t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4511    if (U_FAILURE(err)) {
4512        errln("Failed to instantiate transliterator with fake ID");
4513        delete t1;
4514        Transliterator::unregister(realID);
4515        return;
4516    }
4517
4518    t1->toRules(rules1, TRUE);
4519    t2->toRules(rules2, TRUE);
4520    if (rules1 != rules2)
4521        errln("Alias transliterators aren't the same");
4522
4523    delete t1;
4524    delete t2;
4525    Transliterator::unregister(fakeID);
4526}
4527
4528void TransliteratorTest::TestRuleStripping() {
4529    /*
4530#
4531\uE001>\u0C01; # SIGN
4532    */
4533    static const UChar rule[] = {
4534        0x0023,0x0020,0x000D,0x000A,
4535        0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4536    };
4537    static const UChar expectedRule[] = {
4538        0xE001,0x003E,0x0C01,0x003B,0
4539    };
4540    UChar result[UPRV_LENGTHOF(rule)];
4541    UErrorCode status = U_ZERO_ERROR;
4542    int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4543    if (len != u_strlen(expectedRule)) {
4544        errln("utrans_stripRules return len = %d", len);
4545    }
4546    if (u_strncmp(expectedRule, result, len) != 0) {
4547        errln("utrans_stripRules did not return expected string");
4548    }
4549}
4550
4551/**
4552 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4553 */
4554void TransliteratorTest::TestHalfwidthFullwidth(void) {
4555    UParseError parseError;
4556    UErrorCode status = U_ZERO_ERROR;
4557    Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4558    Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4559    if (hf == 0 || fh == 0) {
4560        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4561        delete hf;
4562        delete fh;
4563        return;
4564    }
4565
4566    // Array of 2n items
4567    // Each item is
4568    //   "hf"|"fh"|"both",
4569    //   <Halfwidth>,
4570    //   <Fullwidth>
4571    const char* DATA[] = {
4572        "both",
4573        "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4574        "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4575    };
4576    int32_t DATA_length = UPRV_LENGTHOF(DATA);
4577
4578    for (int32_t i=0; i<DATA_length; i+=3) {
4579        UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4580        UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4581        switch (*DATA[i]) {
4582        case 0x68: //'h': // Halfwidth-Fullwidth only
4583            expect(*hf, h, f);
4584            break;
4585        case 0x66: //'f': // Fullwidth-Halfwidth only
4586            expect(*fh, f, h);
4587            break;
4588        case 0x62: //'b': // both directions
4589            expect(*hf, h, f);
4590            expect(*fh, f, h);
4591            break;
4592        }
4593    }
4594    delete hf;
4595    delete fh;
4596}
4597
4598
4599    /**
4600     *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4601     *              TODO: confirm that the expected results are correct.
4602     *              For now, test just confirms that C++ and Java give identical results.
4603     */
4604void TransliteratorTest::TestThai(void) {
4605#if !UCONFIG_NO_BREAK_ITERATION
4606    UParseError parseError;
4607    UErrorCode status = U_ZERO_ERROR;
4608    Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4609    if (tr == 0) {
4610        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4611        return;
4612    }
4613    if (U_FAILURE(status)) {
4614        errln("FAIL: createInstance failed with %s", u_errorName(status));
4615        return;
4616    }
4617    const char *thaiText =
4618        "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4619        "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4620        "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4621        "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4622        "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4623        "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4624        "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4625        "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4626        "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4627        "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4628        "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4629        "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4630        "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4631        "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4632        "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4633        "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4634        "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4635        "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4636        "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4637        "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4638        "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4639        "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4640        "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4641        "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4642        " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4643        "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4644        "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4645        " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4646        "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4647        "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4648
4649    const char *latinText =
4650        "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4651        "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4652        "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4653        "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4654        "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4655        " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4656        "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4657        "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4658        "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4659        "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4660        "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4661        "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4662        " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4663        "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4664        " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4665        "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4666        "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4667        "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4668
4669
4670    UnicodeString  xlitText(thaiText);
4671    xlitText = xlitText.unescape();
4672    tr->transliterate(xlitText);
4673
4674    UnicodeString expectedText(latinText);
4675    expectedText = expectedText.unescape();
4676    expect(*tr, xlitText, expectedText);
4677
4678    delete tr;
4679#endif
4680}
4681
4682
4683//======================================================================
4684// Support methods
4685//======================================================================
4686void TransliteratorTest::expectT(const UnicodeString& id,
4687                                 const UnicodeString& source,
4688                                 const UnicodeString& expectedResult) {
4689    UErrorCode ec = U_ZERO_ERROR;
4690    UParseError pe;
4691    Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4692    if (U_FAILURE(ec)) {
4693        errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4694        delete t;
4695        return;
4696    }
4697    expect(*t, source, expectedResult);
4698    delete t;
4699}
4700
4701void TransliteratorTest::reportParseError(const UnicodeString& message,
4702                                          const UParseError& parseError,
4703                                          const UErrorCode& status) {
4704    dataerrln(message +
4705          /*", parse error " + parseError.code +*/
4706          ", line " + parseError.line +
4707          ", offset " + parseError.offset +
4708          ", pre-context " + prettify(parseError.preContext, TRUE) +
4709          ", post-context " + prettify(parseError.postContext,TRUE) +
4710          ", Error: " + u_errorName(status));
4711}
4712
4713void TransliteratorTest::expect(const UnicodeString& rules,
4714                                const UnicodeString& source,
4715                                const UnicodeString& expectedResult,
4716                                UTransPosition *pos) {
4717    expect("<ID>", rules, source, expectedResult, pos);
4718}
4719
4720void TransliteratorTest::expect(const UnicodeString& id,
4721                                const UnicodeString& rules,
4722                                const UnicodeString& source,
4723                                const UnicodeString& expectedResult,
4724                                UTransPosition *pos) {
4725    UErrorCode status = U_ZERO_ERROR;
4726    UParseError parseError;
4727    Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4728    if (U_FAILURE(status)) {
4729        reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4730    } else {
4731        expect(*t, source, expectedResult, pos);
4732    }
4733    delete t;
4734}
4735
4736void TransliteratorTest::expect(const Transliterator& t,
4737                                const UnicodeString& source,
4738                                const UnicodeString& expectedResult,
4739                                const Transliterator& reverseTransliterator) {
4740    expect(t, source, expectedResult);
4741    expect(reverseTransliterator, expectedResult, source);
4742}
4743
4744void TransliteratorTest::expect(const Transliterator& t,
4745                                const UnicodeString& source,
4746                                const UnicodeString& expectedResult,
4747                                UTransPosition *pos) {
4748    if (pos == 0) {
4749        UnicodeString result(source);
4750        t.transliterate(result);
4751        expectAux(t.getID() + ":String", source, result, expectedResult);
4752    }
4753    UTransPosition index={0, 0, 0, 0};
4754    if (pos != 0) {
4755        index = *pos;
4756    }
4757
4758    UnicodeString rsource(source);
4759    if (pos == 0) {
4760        t.transliterate(rsource);
4761    } else {
4762        // Do it all at once -- below we do it incrementally
4763        t.finishTransliteration(rsource, *pos);
4764    }
4765    expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4766
4767    // Test keyboard (incremental) transliteration -- this result
4768    // must be the same after we finalize (see below).
4769    UnicodeString log;
4770    rsource.remove();
4771    if (pos != 0) {
4772        rsource = source;
4773        formatInput(log, rsource, index);
4774        log.append(" -> ");
4775        UErrorCode status = U_ZERO_ERROR;
4776        t.transliterate(rsource, index, status);
4777        formatInput(log, rsource, index);
4778    } else {
4779        for (int32_t i=0; i<source.length(); ++i) {
4780            if (i != 0) {
4781                log.append(" + ");
4782            }
4783            log.append(source.charAt(i)).append(" -> ");
4784            UErrorCode status = U_ZERO_ERROR;
4785            t.transliterate(rsource, index, source.charAt(i), status);
4786            formatInput(log, rsource, index);
4787        }
4788    }
4789
4790    // As a final step in keyboard transliteration, we must call
4791    // transliterate to finish off any pending partial matches that
4792    // were waiting for more input.
4793    t.finishTransliteration(rsource, index);
4794    log.append(" => ").append(rsource);
4795
4796    expectAux(t.getID() + ":Keyboard", log,
4797              rsource == expectedResult,
4798              expectedResult);
4799}
4800
4801
4802/**
4803 * @param appendTo result is appended to this param.
4804 * @param input the string being transliterated
4805 * @param pos the index struct
4806 */
4807UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4808                                               const UnicodeString& input,
4809                                               const UTransPosition& pos) {
4810    // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4811    // the {} indicate the context start and limit, and the ||
4812    // indicate the start and limit.
4813    if (0 <= pos.contextStart &&
4814        pos.contextStart <= pos.start &&
4815        pos.start <= pos.limit &&
4816        pos.limit <= pos.contextLimit &&
4817        pos.contextLimit <= input.length()) {
4818
4819        UnicodeString a, b, c, d, e;
4820        input.extractBetween(0, pos.contextStart, a);
4821        input.extractBetween(pos.contextStart, pos.start, b);
4822        input.extractBetween(pos.start, pos.limit, c);
4823        input.extractBetween(pos.limit, pos.contextLimit, d);
4824        input.extractBetween(pos.contextLimit, input.length(), e);
4825        appendTo.append(a).append((UChar)123/*{*/).append(b).
4826            append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4827            append((UChar)125/*}*/).append(e);
4828    } else {
4829        appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4830                        pos.contextStart + ", s=" + pos.start + ", l=" +
4831                        pos.limit + ", cl=" + pos.contextLimit + "} on " +
4832                        input);
4833    }
4834    return appendTo;
4835}
4836
4837void TransliteratorTest::expectAux(const UnicodeString& tag,
4838                                   const UnicodeString& source,
4839                                   const UnicodeString& result,
4840                                   const UnicodeString& expectedResult) {
4841    expectAux(tag, source + " -> " + result,
4842              result == expectedResult,
4843              expectedResult);
4844}
4845
4846void TransliteratorTest::expectAux(const UnicodeString& tag,
4847                                   const UnicodeString& summary, UBool pass,
4848                                   const UnicodeString& expectedResult) {
4849    if (pass) {
4850        logln(UnicodeString("(")+tag+") " + prettify(summary));
4851    } else {
4852        dataerrln(UnicodeString("FAIL: (")+tag+") "
4853              + prettify(summary)
4854              + ", expected " + prettify(expectedResult));
4855    }
4856}
4857
4858#endif /* #if !UCONFIG_NO_TRANSLITERATION */
4859