1/*
2**********************************************************************
3*   Copyright (C) 1999-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   11/10/99    aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "transtst.h"
16#include "unicode/locid.h"
17#include "unicode/dtfmtsym.h"
18#include "unicode/normlzr.h"
19#include "unicode/translit.h"
20#include "unicode/uchar.h"
21#include "unicode/unifilt.h"
22#include "unicode/uniset.h"
23#include "unicode/ustring.h"
24#include "unicode/usetiter.h"
25#include "unicode/uscript.h"
26#include "unicode/utf16.h"
27#include "cpdtrans.h"
28#include "nultrans.h"
29#include "rbt.h"
30#include "rbt_pars.h"
31#include "anytrans.h"
32#include "esctrn.h"
33#include "name2uni.h"
34#include "nortrans.h"
35#include "remtrans.h"
36#include "titletrn.h"
37#include "tolowtrn.h"
38#include "toupptrn.h"
39#include "unesctrn.h"
40#include "uni2name.h"
41#include "cstring.h"
42#include "cmemory.h"
43#include <stdio.h>
44
45/***********************************************************************
46
47                     HOW TO USE THIS TEST FILE
48                               -or-
49                  How I developed on two platforms
50                without losing (too much of) my mind
51
52
531. Add new tests by copying/pasting/changing existing tests.  On Java,
54   any public void method named Test...() taking no parameters becomes
55   a test.  On C++, you need to modify the header and add a line to
56   the runIndexedTest() dispatch method.
57
582. Make liberal use of the expect() method; it is your friend.
59
603. The tests in this file exactly match those in a sister file on the
61   other side.  The two files are:
62
63   icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
64   icu4c:  source/test/intltest/transtst.cpp
65
66                  ==> THIS IS THE IMPORTANT PART <==
67
68   When you add a test in this file, add it in TransliteratorTest.java
69   too.  Give it the same name and put it in the same relative place.
70   This makes maintenance a lot simpler for any poor soul who ends up
71   trying to synchronize the tests between icu4j and icu4c.
72
734. If you MUST enter a test that is NOT paralleled in the sister file,
74   then add it in the special non-mirrored section.  These are
75   labeled
76
77     "icu4j ONLY"
78
79   or
80
81     "icu4c ONLY"
82
83   Make sure you document the reason the test is here and not there.
84
85
86Thank you.
87The Management
88***********************************************************************/
89
90// Define character constants thusly to be EBCDIC-friendly
91enum {
92    LEFT_BRACE=((UChar)0x007B), /*{*/
93    PIPE      =((UChar)0x007C), /*|*/
94    ZERO      =((UChar)0x0030), /*0*/
95    UPPER_A   =((UChar)0x0041)  /*A*/
96};
97
98TransliteratorTest::TransliteratorTest()
99:   DESERET_DEE((UChar32)0x10414),
100    DESERET_dee((UChar32)0x1043C)
101{
102}
103
104TransliteratorTest::~TransliteratorTest() {}
105
106void
107TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
108                                   const char* &name, char* /*par*/) {
109    switch (index) {
110        TESTCASE(0,TestInstantiation);
111        TESTCASE(1,TestSimpleRules);
112        TESTCASE(2,TestRuleBasedInverse);
113        TESTCASE(3,TestKeyboard);
114        TESTCASE(4,TestKeyboard2);
115        TESTCASE(5,TestKeyboard3);
116        TESTCASE(6,TestArabic);
117        TESTCASE(7,TestCompoundKana);
118        TESTCASE(8,TestCompoundHex);
119        TESTCASE(9,TestFiltering);
120        TESTCASE(10,TestInlineSet);
121        TESTCASE(11,TestPatternQuoting);
122        TESTCASE(12,TestJ277);
123        TESTCASE(13,TestJ243);
124        TESTCASE(14,TestJ329);
125        TESTCASE(15,TestSegments);
126        TESTCASE(16,TestCursorOffset);
127        TESTCASE(17,TestArbitraryVariableValues);
128        TESTCASE(18,TestPositionHandling);
129        TESTCASE(19,TestHiraganaKatakana);
130        TESTCASE(20,TestCopyJ476);
131        TESTCASE(21,TestAnchors);
132        TESTCASE(22,TestInterIndic);
133        TESTCASE(23,TestFilterIDs);
134        TESTCASE(24,TestCaseMap);
135        TESTCASE(25,TestNameMap);
136        TESTCASE(26,TestLiberalizedID);
137        TESTCASE(27,TestCreateInstance);
138        TESTCASE(28,TestNormalizationTransliterator);
139        TESTCASE(29,TestCompoundRBT);
140        TESTCASE(30,TestCompoundFilter);
141        TESTCASE(31,TestRemove);
142        TESTCASE(32,TestToRules);
143        TESTCASE(33,TestContext);
144        TESTCASE(34,TestSupplemental);
145        TESTCASE(35,TestQuantifier);
146        TESTCASE(36,TestSTV);
147        TESTCASE(37,TestCompoundInverse);
148        TESTCASE(38,TestNFDChainRBT);
149        TESTCASE(39,TestNullInverse);
150        TESTCASE(40,TestAliasInverseID);
151        TESTCASE(41,TestCompoundInverseID);
152        TESTCASE(42,TestUndefinedVariable);
153        TESTCASE(43,TestEmptyContext);
154        TESTCASE(44,TestCompoundFilterID);
155        TESTCASE(45,TestPropertySet);
156        TESTCASE(46,TestNewEngine);
157        TESTCASE(47,TestQuantifiedSegment);
158        TESTCASE(48,TestDevanagariLatinRT);
159        TESTCASE(49,TestTeluguLatinRT);
160        TESTCASE(50,TestCompoundLatinRT);
161        TESTCASE(51,TestSanskritLatinRT);
162        TESTCASE(52,TestLocaleInstantiation);
163        TESTCASE(53,TestTitleAccents);
164        TESTCASE(54,TestLocaleResource);
165        TESTCASE(55,TestParseError);
166        TESTCASE(56,TestOutputSet);
167        TESTCASE(57,TestVariableRange);
168        TESTCASE(58,TestInvalidPostContext);
169        TESTCASE(59,TestIDForms);
170        TESTCASE(60,TestToRulesMark);
171        TESTCASE(61,TestEscape);
172        TESTCASE(62,TestAnchorMasking);
173        TESTCASE(63,TestDisplayName);
174        TESTCASE(64,TestSpecialCases);
175#if !UCONFIG_NO_FILE_IO
176        TESTCASE(65,TestIncrementalProgress);
177#endif
178        TESTCASE(66,TestSurrogateCasing);
179        TESTCASE(67,TestFunction);
180        TESTCASE(68,TestInvalidBackRef);
181        TESTCASE(69,TestMulticharStringSet);
182        TESTCASE(70,TestUserFunction);
183        TESTCASE(71,TestAnyX);
184        TESTCASE(72,TestSourceTargetSet);
185        TESTCASE(73,TestGurmukhiDevanagari);
186        TESTCASE(74,TestPatternWhiteSpace);
187        TESTCASE(75,TestAllCodepoints);
188        TESTCASE(76,TestBoilerplate);
189        TESTCASE(77,TestAlternateSyntax);
190        TESTCASE(78,TestBeginEnd);
191        TESTCASE(79,TestBeginEndToRules);
192        TESTCASE(80,TestRegisterAlias);
193        TESTCASE(81,TestRuleStripping);
194        TESTCASE(82,TestHalfwidthFullwidth);
195        TESTCASE(83,TestThai);
196        TESTCASE(84,TestAny);
197        default: name = ""; break;
198    }
199}
200
201static const UVersionInfo ICU_39 = {3,9,4,0};
202/**
203 * Make sure every system transliterator can be instantiated.
204 *
205 * ALSO test that the result of toRules() for each rule is a valid
206 * rule.  Do this here so we don't have to have another test that
207 * instantiates everything as well.
208 */
209void TransliteratorTest::TestInstantiation() {
210    UErrorCode ec = U_ZERO_ERROR;
211    StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
212    assertSuccess("getAvailableIDs()", ec);
213    assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
214    int32_t n = Transliterator::countAvailableIDs();
215    assertTrue("getAvailableIDs().count()==countAvailableIDs()",
216               avail->count(ec) == n);
217    assertSuccess("count()", ec);
218    UnicodeString name;
219    for (int32_t i=0; i<n; ++i) {
220        const UnicodeString& id = *avail->snext(ec);
221        if (!assertSuccess("snext()", ec) ||
222            !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
223            break;
224        }
225        UnicodeString id2 = Transliterator::getAvailableID(i);
226        if (id.length() < 1) {
227            errln(UnicodeString("FAIL: getAvailableID(") +
228                  i + ") returned empty string");
229            continue;
230        }
231        if (id != id2) {
232            errln(UnicodeString("FAIL: getAvailableID(") +
233                  i + ") != getAvailableIDs().snext()");
234            continue;
235        }
236        UParseError parseError;
237        UErrorCode status = U_ZERO_ERROR;
238        Transliterator* t = Transliterator::createInstance(id,
239                              UTRANS_FORWARD, parseError,status);
240        name.truncate(0);
241        Transliterator::getDisplayName(id, name);
242        if (t == 0) {
243#if UCONFIG_NO_BREAK_ITERATION
244            // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
245            if (id.compare((UnicodeString)"Thai-Latin") != 0)
246#endif
247                dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
248                      /*", parse error " + parseError.code +*/
249                      ", line " + parseError.line +
250                      ", offset " + parseError.offset +
251                      ", pre-context " + prettify(parseError.preContext, TRUE) +
252                      ", post-context " +prettify(parseError.postContext,TRUE) +
253                      ", Error: " + u_errorName(status));
254                // When createInstance fails, it deletes the failing
255                // entry from the available ID list.  We detect this
256                // here by looking for a change in countAvailableIDs.
257            int32_t nn = Transliterator::countAvailableIDs();
258            if (nn == (n - 1)) {
259                n = nn;
260                --i; // Compensate for deleted entry
261            }
262        } else {
263            logln(UnicodeString("OK: ") + name + " (" + id + ")");
264
265            // Now test toRules
266            UnicodeString rules;
267            t->toRules(rules, TRUE);
268            Transliterator *u = Transliterator::createFromRules("x",
269                                    rules, UTRANS_FORWARD, parseError,status);
270            if (u == 0) {
271                errln(UnicodeString("FAIL: ") + id +
272                      ".createFromRules() => bad rules" +
273                      /*", parse error " + parseError.code +*/
274                      ", line " + parseError.line +
275                      ", offset " + parseError.offset +
276                      ", context " + prettify(parseError.preContext, TRUE) +
277                      ", rules: " + prettify(rules, TRUE));
278            } else {
279                delete u;
280            }
281            delete t;
282        }
283    }
284    assertTrue("snext()==NULL", avail->snext(ec)==NULL);
285    assertSuccess("snext()", ec);
286    delete avail;
287
288    // Now test the failure path
289    UParseError parseError;
290    UErrorCode status = U_ZERO_ERROR;
291    UnicodeString id("<Not a valid Transliterator ID>");
292    Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
293    if (t != 0) {
294        errln("FAIL: " + id + " returned a transliterator");
295        delete t;
296    } else {
297        logln("OK: Bogus ID handled properly");
298    }
299}
300
301void TransliteratorTest::TestSimpleRules(void) {
302    /* Example: rules 1. ab>x|y
303     *                2. yc>z
304     *
305     * []|eabcd  start - no match, copy e to tranlated buffer
306     * [e]|abcd  match rule 1 - copy output & adjust cursor
307     * [ex|y]cd  match rule 2 - copy output & adjust cursor
308     * [exz]|d   no match, copy d to transliterated buffer
309     * [exzd]|   done
310     */
311    expect(UnicodeString("ab>x|y;", "") +
312           "yc>z",
313           "eabcd", "exzd");
314
315    /* Another set of rules:
316     *    1. ab>x|yzacw
317     *    2. za>q
318     *    3. qc>r
319     *    4. cw>n
320     *
321     * []|ab       Rule 1
322     * [x|yzacw]   No match
323     * [xy|zacw]   Rule 2
324     * [xyq|cw]    Rule 4
325     * [xyqn]|     Done
326     */
327    expect(UnicodeString("ab>x|yzacw;") +
328           "za>q;" +
329           "qc>r;" +
330           "cw>n",
331           "ab", "xyqn");
332
333    /* Test categories
334     */
335    UErrorCode status = U_ZERO_ERROR;
336    UParseError parseError;
337    Transliterator *t = Transliterator::createFromRules(
338        "<ID>",
339        UnicodeString("$dummy=").append((UChar)0xE100) +
340        UnicodeString(";"
341                      "$vowel=[aeiouAEIOU];"
342                      "$lu=[:Lu:];"
343                      "$vowel } $lu > '!';"
344                      "$vowel > '&';"
345                      "'!' { $lu > '^';"
346                      "$lu > '*';"
347                      "a > ERROR", ""),
348        UTRANS_FORWARD, parseError,
349        status);
350    if (U_FAILURE(status)) {
351        dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
352        return;
353    }
354    expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
355    delete t;
356}
357
358/**
359 * Test inline set syntax and set variable syntax.
360 */
361void TransliteratorTest::TestInlineSet(void) {
362    expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
363    expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
364
365    expect(UnicodeString(
366           "$digit = [0-9];"
367           "$alpha = [a-zA-Z];"
368           "$alphanumeric = [$digit $alpha];" // ***
369           "$special = [^$alphanumeric];"     // ***
370           "$alphanumeric > '-';"
371           "$special > '*';", ""),
372
373           "thx-1138", "---*----");
374}
375
376/**
377 * Create some inverses and confirm that they work.  We have to be
378 * careful how we do this, since the inverses will not be true
379 * inverses -- we can't throw any random string at the composition
380 * of the transliterators and expect the identity function.  F x
381 * F' != I.  However, if we are careful about the input, we will
382 * get the expected results.
383 */
384void TransliteratorTest::TestRuleBasedInverse(void) {
385    UnicodeString RULES =
386        UnicodeString("abc>zyx;") +
387        "ab>yz;" +
388        "bc>zx;" +
389        "ca>xy;" +
390        "a>x;" +
391        "b>y;" +
392        "c>z;" +
393
394        "abc<zyx;" +
395        "ab<yz;" +
396        "bc<zx;" +
397        "ca<xy;" +
398        "a<x;" +
399        "b<y;" +
400        "c<z;" +
401
402        "";
403
404    const char* DATA[] = {
405        // Careful here -- random strings will not work.  If we keep
406        // the left side to the domain and the right side to the range
407        // we will be okay though (left, abc; right xyz).
408        "a", "x",
409        "abcacab", "zyxxxyy",
410        "caccb", "xyzzy",
411    };
412
413    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
414
415    UErrorCode status = U_ZERO_ERROR;
416    UParseError parseError;
417    Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
418                                UTRANS_FORWARD, parseError, status);
419    Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
420                                UTRANS_REVERSE, parseError, status);
421    if (U_FAILURE(status)) {
422        errln("FAIL: RBT constructor failed");
423        return;
424    }
425    for (int32_t i=0; i<DATA_length; i+=2) {
426        expect(*fwd, DATA[i], DATA[i+1]);
427        expect(*rev, DATA[i+1], DATA[i]);
428    }
429    delete fwd;
430    delete rev;
431}
432
433/**
434 * Basic test of keyboard.
435 */
436void TransliteratorTest::TestKeyboard(void) {
437    UParseError parseError;
438    UErrorCode status = U_ZERO_ERROR;
439    Transliterator *t = Transliterator::createFromRules("<ID>",
440                              UnicodeString("psch>Y;")
441                              +"ps>y;"
442                              +"ch>x;"
443                              +"a>A;",
444                              UTRANS_FORWARD, parseError,
445                              status);
446    if (U_FAILURE(status)) {
447        errln("FAIL: RBT constructor failed");
448        return;
449    }
450    const char* DATA[] = {
451        // insertion, buffer
452        "a", "A",
453        "p", "Ap",
454        "s", "Aps",
455        "c", "Apsc",
456        "a", "AycA",
457        "psch", "AycAY",
458        0, "AycAY", // null means finishKeyboardTransliteration
459    };
460
461    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
462    delete t;
463}
464
465/**
466 * Basic test of keyboard with cursor.
467 */
468void TransliteratorTest::TestKeyboard2(void) {
469    UParseError parseError;
470    UErrorCode status = U_ZERO_ERROR;
471    Transliterator *t = Transliterator::createFromRules("<ID>",
472                              UnicodeString("ych>Y;")
473                              +"ps>|y;"
474                              +"ch>x;"
475                              +"a>A;",
476                              UTRANS_FORWARD, parseError,
477                              status);
478    if (U_FAILURE(status)) {
479        errln("FAIL: RBT constructor failed");
480        return;
481    }
482    const char* DATA[] = {
483        // insertion, buffer
484        "a", "A",
485        "p", "Ap",
486        "s", "Aps", // modified for rollback - "Ay",
487        "c", "Apsc", // modified for rollback - "Ayc",
488        "a", "AycA",
489        "p", "AycAp",
490        "s", "AycAps", // modified for rollback - "AycAy",
491        "c", "AycApsc", // modified for rollback - "AycAyc",
492        "h", "AycAY",
493        0, "AycAY", // null means finishKeyboardTransliteration
494    };
495
496    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
497    delete t;
498}
499
500/**
501 * Test keyboard transliteration with back-replacement.
502 */
503void TransliteratorTest::TestKeyboard3(void) {
504    // We want th>z but t>y.  Furthermore, during keyboard
505    // transliteration we want t>y then yh>z if t, then h are
506    // typed.
507    UnicodeString RULES("t>|y;"
508                        "yh>z;");
509
510    const char* DATA[] = {
511        // Column 1: characters to add to buffer (as if typed)
512        // Column 2: expected appearance of buffer after
513        //           keyboard xliteration.
514        "a", "a",
515        "b", "ab",
516        "t", "abt", // modified for rollback - "aby",
517        "c", "abyc",
518        "t", "abyct", // modified for rollback - "abycy",
519        "h", "abycz",
520        0, "abycz", // null means finishKeyboardTransliteration
521    };
522
523    UParseError parseError;
524    UErrorCode status = U_ZERO_ERROR;
525    Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
526    if (U_FAILURE(status)) {
527        errln("FAIL: RBT constructor failed");
528        return;
529    }
530    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
531    delete t;
532}
533
534void TransliteratorTest::keyboardAux(const Transliterator& t,
535                                     const char* DATA[], int32_t DATA_length) {
536    UErrorCode status = U_ZERO_ERROR;
537    UTransPosition index={0, 0, 0, 0};
538    UnicodeString s;
539    for (int32_t i=0; i<DATA_length; i+=2) {
540        UnicodeString log;
541        if (DATA[i] != 0) {
542            log = s + " + "
543                + DATA[i]
544                + " -> ";
545            t.transliterate(s, index, DATA[i], status);
546        } else {
547            log = s + " => ";
548            t.finishTransliteration(s, index);
549        }
550        // Show the start index '{' and the cursor '|'
551        UnicodeString a, b, c;
552        s.extractBetween(0, index.contextStart, a);
553        s.extractBetween(index.contextStart, index.start, b);
554        s.extractBetween(index.start, s.length(), c);
555        log.append(a).
556            append((UChar)LEFT_BRACE).
557            append(b).
558            append((UChar)PIPE).
559            append(c);
560        if (s == DATA[i+1] && U_SUCCESS(status)) {
561            logln(log);
562        } else {
563            errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
564        }
565    }
566}
567
568void TransliteratorTest::TestArabic(void) {
569// Test disabled for 2.0 until new Arabic transliterator can be written.
570//    /*
571//    const char* DATA[] = {
572//        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
573//                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
574//                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
575//                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
576//                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
577//                  "\u062c\u0645\u064a\u0644\u0629",
578//    };
579//    */
580//
581//    UChar ar_raw[] = {
582//        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
583//        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
584//        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
585//        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
586//        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
587//        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
588//    };
589//    UnicodeString ar(ar_raw);
590//    UErrorCode status=U_ZERO_ERROR;
591//    UParseError parseError;
592//    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
593//    if (t == 0) {
594//        errln("FAIL: createInstance failed");
595//        return;
596//    }
597//    expect(*t, "Arabic", ar);
598//    delete t;
599}
600
601/**
602 * Compose the Kana transliterator forward and reverse and try
603 * some strings that should come out unchanged.
604 */
605void TransliteratorTest::TestCompoundKana(void) {
606    UParseError parseError;
607    UErrorCode status = U_ZERO_ERROR;
608    Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
609    if (t == 0) {
610        dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
611    } else {
612        expect(*t, "aaaaa", "aaaaa");
613        delete t;
614    }
615}
616
617/**
618 * Compose the hex transliterators forward and reverse.
619 */
620void TransliteratorTest::TestCompoundHex(void) {
621    UParseError parseError;
622    UErrorCode status = U_ZERO_ERROR;
623    Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
624    Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
625    Transliterator* transab[] = { a, b };
626    Transliterator* transba[] = { b, a };
627    if (a == 0 || b == 0) {
628        errln("FAIL: construction failed");
629        delete a;
630        delete b;
631        return;
632    }
633    // Do some basic tests of a
634    expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
635    // Do some basic tests of b
636    expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
637
638    Transliterator* ab = new CompoundTransliterator(transab, 2);
639    UnicodeString s("abcde", "");
640    expect(*ab, s, s);
641
642    UnicodeString str(s);
643    a->transliterate(str);
644    Transliterator* ba = new CompoundTransliterator(transba, 2);
645    expect(*ba, str, str);
646
647    delete ab;
648    delete ba;
649    delete a;
650    delete b;
651}
652
653int gTestFilterClassID = 0;
654/**
655 * Used by TestFiltering().
656 */
657class TestFilter : public UnicodeFilter {
658    virtual UnicodeFunctor* clone() const {
659        return new TestFilter(*this);
660    }
661    virtual UBool contains(UChar32 c) const {
662        return c != (UChar)0x0063 /*c*/;
663    }
664    // Stubs
665    virtual UnicodeString& toPattern(UnicodeString& result,
666                                     UBool /*escapeUnprintable*/) const {
667        return result;
668    }
669    virtual UBool matchesIndexValue(uint8_t /*v*/) const {
670        return FALSE;
671    }
672    virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
673public:
674    UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
675};
676
677/**
678 * Do some basic tests of filtering.
679 */
680void TransliteratorTest::TestFiltering(void) {
681    UParseError parseError;
682    UErrorCode status = U_ZERO_ERROR;
683    Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
684    if (hex == 0) {
685        errln("FAIL: createInstance(Any-Hex) failed");
686        return;
687    }
688    hex->adoptFilter(new TestFilter());
689    UnicodeString s("abcde");
690    hex->transliterate(s);
691    UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
692    if (s == exp) {
693        logln(UnicodeString("Ok:   \"") + exp + "\"");
694    } else {
695        logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
696    }
697
698    // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
699    UnicodeFilter *f = hex->orphanFilter();
700    if (f == NULL){
701        errln("FAIL: orphanFilter() should get a UnicodeFilter");
702    } else {
703        delete f;
704    }
705    delete hex;
706}
707
708/**
709 * Test anchors
710 */
711void TransliteratorTest::TestAnchors(void) {
712    expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
713           "aaa",
714           "012");
715    expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
716           "aaa",
717           "012");
718    expect(UnicodeString("^ab  > 01 ;"
719           " ab  > |8 ;"
720           "  b  > k ;"
721           " 8x$ > 45 ;"
722           " 8x  > 77 ;", ""),
723
724           "ababbabxabx",
725           "018k7745");
726    expect(UnicodeString("$s = [z$] ;"
727           "$s{ab    > 01 ;"
728           "   ab    > |8 ;"
729           "    b    > k ;"
730           "   8x}$s > 45 ;"
731           "   8x    > 77 ;", ""),
732
733           "abzababbabxzabxabx",
734           "01z018k45z01x45");
735}
736
737/**
738 * Test pattern quoting and escape mechanisms.
739 */
740void TransliteratorTest::TestPatternQuoting(void) {
741    // Array of 3n items
742    // Each item is <rules>, <input>, <expected output>
743    const UnicodeString DATA[] = {
744        UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
745        UnicodeString(UChar(0x4E01)),
746        "[male adult]"
747    };
748
749    for (int32_t i=0; i<3; i+=3) {
750        logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
751        UParseError parseError;
752        UErrorCode status = U_ZERO_ERROR;
753        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
754        if (U_FAILURE(status)) {
755            errln("RBT constructor failed");
756        } else {
757            expect(*t, DATA[i+1], DATA[i+2]);
758        }
759        delete t;
760    }
761}
762
763/**
764 * Regression test for bugs found in Greek transliteration.
765 */
766void TransliteratorTest::TestJ277(void) {
767    UErrorCode status = U_ZERO_ERROR;
768    UParseError parseError;
769    Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
770    if (gl == NULL) {
771        dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
772        return;
773    }
774
775    UChar sigma = 0x3C3;
776    UChar upsilon = 0x3C5;
777    UChar nu = 0x3BD;
778//    UChar PHI = 0x3A6;
779    UChar alpha = 0x3B1;
780//    UChar omega = 0x3C9;
781//    UChar omicron = 0x3BF;
782//    UChar epsilon = 0x3B5;
783
784    // sigma upsilon nu -> syn
785    UnicodeString syn;
786    syn.append(sigma).append(upsilon).append(nu);
787    expect(*gl, syn, "syn");
788
789    // sigma alpha upsilon nu -> saun
790    UnicodeString sayn;
791    sayn.append(sigma).append(alpha).append(upsilon).append(nu);
792    expect(*gl, sayn, "saun");
793
794    // Again, using a smaller rule set
795    UnicodeString rules(
796                "$alpha   = \\u03B1;"
797                "$nu      = \\u03BD;"
798                "$sigma   = \\u03C3;"
799                "$ypsilon = \\u03C5;"
800                "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
801                "s <>           $sigma;"
802                "a <>           $alpha;"
803                "u <>  $vowel { $ypsilon;"
804                "y <>           $ypsilon;"
805                "n <>           $nu;",
806                "");
807    Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
808    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
809    expect(*mini, syn, "syn");
810    expect(*mini, sayn, "saun");
811    delete mini;
812    mini = NULL;
813
814#if !UCONFIG_NO_FORMATTING
815    // Transliterate the Greek locale data
816    Locale el("el");
817    DateFormatSymbols syms(el, status);
818    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
819    int32_t i, count;
820    const UnicodeString* data = syms.getMonths(count);
821    for (i=0; i<count; ++i) {
822        if (data[i].length() == 0) {
823            continue;
824        }
825        UnicodeString out(data[i]);
826        gl->transliterate(out);
827        UBool ok = TRUE;
828        if (data[i].length() >= 2 && out.length() >= 2 &&
829            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
830            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
831                ok = FALSE;
832            }
833        }
834        if (ok) {
835            logln(prettify(data[i] + " -> " + out));
836        } else {
837            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
838        }
839    }
840#endif
841
842    delete gl;
843}
844
845/**
846 * Prefix, suffix support in hex transliterators
847 */
848void TransliteratorTest::TestJ243(void) {
849    UErrorCode ec = U_ZERO_ERROR;
850
851    // Test default Hex-Any, which should handle
852    // \u, \U, u+, and U+
853    Transliterator *hex =
854        Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
855    if (assertSuccess("getInstance", ec)) {
856        expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
857    }
858    delete hex;
859
860//    // Try a custom Hex-Unicode
861//    // \uXXXX and &#xXXXX;
862//    ec = U_ZERO_ERROR;
863//    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
864//    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
865//           "abcd5fx012&#x00033;");
866//    // Try custom Any-Hex (default is tested elsewhere)
867//    ec = U_ZERO_ERROR;
868//    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
869//    expect(hex3, "012", "&#x30;&#x31;&#x32;");
870}
871
872/**
873 * Parsers need better syntax error messages.
874 */
875void TransliteratorTest::TestJ329(void) {
876
877    struct { UBool containsErrors; const char* rule; } DATA[] = {
878        { FALSE, "a > b; c > d" },
879        { TRUE,  "a > b; no operator; c > d" },
880    };
881    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
882
883    for (int32_t i=0; i<DATA_length; ++i) {
884        UErrorCode status = U_ZERO_ERROR;
885        UParseError parseError;
886        Transliterator *rbt = Transliterator::createFromRules("<ID>",
887                                    DATA[i].rule,
888                                    UTRANS_FORWARD,
889                                    parseError,
890                                    status);
891        UBool gotError = U_FAILURE(status);
892        UnicodeString desc(DATA[i].rule);
893        desc.append(gotError ? " -> error" : " -> no error");
894        if (gotError) {
895            desc = desc + ", ParseError code=" + u_errorName(status) +
896                " line=" + parseError.line +
897                " offset=" + parseError.offset +
898                " context=" + parseError.preContext;
899        }
900        if (gotError == DATA[i].containsErrors) {
901            logln(UnicodeString("Ok:   ") + desc);
902        } else {
903            errln(UnicodeString("FAIL: ") + desc);
904        }
905        delete rbt;
906    }
907}
908
909/**
910 * Test segments and segment references.
911 */
912void TransliteratorTest::TestSegments(void) {
913    // Array of 3n items
914    // Each item is <rules>, <input>, <expected output>
915    UnicodeString DATA[] = {
916        "([a-z]) '.' ([0-9]) > $2 '-' $1",
917        "abc.123.xyz.456",
918        "ab1-c23.xy4-z56",
919
920        // nested
921        "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
922        "a1 b2",
923        "a1.a.1 b2.b.2",
924    };
925    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
926
927    for (int32_t i=0; i<DATA_length; i+=3) {
928        logln("Pattern: " + prettify(DATA[i]));
929        UParseError parseError;
930        UErrorCode status = U_ZERO_ERROR;
931        Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
932        if (U_FAILURE(status)) {
933            errln("FAIL: RBT constructor");
934        } else {
935            expect(*t, DATA[i+1], DATA[i+2]);
936        }
937        delete t;
938    }
939}
940
941/**
942 * Test cursor positioning outside of the key
943 */
944void TransliteratorTest::TestCursorOffset(void) {
945    // Array of 3n items
946    // Each item is <rules>, <input>, <expected output>
947    UnicodeString DATA[] = {
948        "pre {alpha} post > | @ ALPHA ;"
949        "eALPHA > beta ;"
950        "pre {beta} post > BETA @@ | ;"
951        "post > xyz",
952
953        "prealphapost prebetapost",
954
955        "prbetaxyz preBETApost",
956    };
957    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
958
959    for (int32_t i=0; i<DATA_length; i+=3) {
960        logln("Pattern: " + prettify(DATA[i]));
961        UParseError parseError;
962        UErrorCode status = U_ZERO_ERROR;
963        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
964        if (U_FAILURE(status)) {
965            errln("FAIL: RBT constructor");
966        } else {
967            expect(*t, DATA[i+1], DATA[i+2]);
968        }
969        delete t;
970    }
971}
972
973/**
974 * Test zero length and > 1 char length variable values.  Test
975 * use of variable refs in UnicodeSets.
976 */
977void TransliteratorTest::TestArbitraryVariableValues(void) {
978    // Array of 3n items
979    // Each item is <rules>, <input>, <expected output>
980    UnicodeString DATA[] = {
981        "$abe = ab;"
982        "$pat = x[yY]z;"
983        "$ll  = 'a-z';"
984        "$llZ = [$ll];"
985        "$llY = [$ll$pat];"
986        "$emp = ;"
987
988        "$abe > ABE;"
989        "$pat > END;"
990        "$llZ > 1;"
991        "$llY > 2;"
992        "7$emp 8 > 9;"
993        "",
994
995        "ab xYzxyz stY78",
996        "ABE ENDEND 1129",
997    };
998    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
999
1000    for (int32_t i=0; i<DATA_length; i+=3) {
1001        logln("Pattern: " + prettify(DATA[i]));
1002        UParseError parseError;
1003        UErrorCode status = U_ZERO_ERROR;
1004        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1005        if (U_FAILURE(status)) {
1006            errln("FAIL: RBT constructor");
1007        } else {
1008            expect(*t, DATA[i+1], DATA[i+2]);
1009        }
1010        delete t;
1011    }
1012}
1013
1014/**
1015 * Confirm that the contextStart, contextLimit, start, and limit
1016 * behave correctly. J474.
1017 */
1018void TransliteratorTest::TestPositionHandling(void) {
1019    // Array of 3n items
1020    // Each item is <rules>, <input>, <expected output>
1021    const char* DATA[] = {
1022        "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1023        "xtat txtb", // pos 0,9,0,9
1024        "xTTaSS TTxUUb",
1025
1026        "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1027        "xtat txtb", // pos 2,9,3,8
1028        "xtaSS TTxUUb",
1029
1030        "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1031        "xtat txtb", // pos 3,8,3,8
1032        "xtaTT TTxTTb",
1033    };
1034
1035    // Array of 4n positions -- these go with the DATA array
1036    // They are: contextStart, contextLimit, start, limit
1037    int32_t POS[] = {
1038        0, 9, 0, 9,
1039        2, 9, 3, 8,
1040        3, 8, 3, 8,
1041    };
1042
1043    int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1044    for (int32_t i=0; i<n; i++) {
1045        UErrorCode status = U_ZERO_ERROR;
1046        UParseError parseError;
1047        Transliterator *t = Transliterator::createFromRules("<ID>",
1048                                DATA[3*i], UTRANS_FORWARD, parseError, status);
1049        if (U_FAILURE(status)) {
1050            delete t;
1051            errln("FAIL: RBT constructor");
1052            return;
1053        }
1054        UTransPosition pos;
1055        pos.contextStart= POS[4*i];
1056        pos.contextLimit = POS[4*i+1];
1057        pos.start = POS[4*i+2];
1058        pos.limit = POS[4*i+3];
1059        UnicodeString rsource(DATA[3*i+1]);
1060        t->transliterate(rsource, pos, status);
1061        if (U_FAILURE(status)) {
1062            delete t;
1063            errln("FAIL: transliterate");
1064            return;
1065        }
1066        t->finishTransliteration(rsource, pos);
1067        expectAux(DATA[3*i],
1068                  DATA[3*i+1],
1069                  rsource,
1070                  DATA[3*i+2]);
1071        delete t;
1072    }
1073}
1074
1075/**
1076 * Test the Hiragana-Katakana transliterator.
1077 */
1078void TransliteratorTest::TestHiraganaKatakana(void) {
1079    UParseError parseError;
1080    UErrorCode status = U_ZERO_ERROR;
1081    Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1082    Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1083    if (hk == 0 || kh == 0) {
1084        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1085        delete hk;
1086        delete kh;
1087        return;
1088    }
1089
1090    // Array of 3n items
1091    // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1092    const char* DATA[] = {
1093        "both",
1094        "\\u3042\\u3090\\u3099\\u3092\\u3050",
1095        "\\u30A2\\u30F8\\u30F2\\u30B0",
1096
1097        "kh",
1098        "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1099        "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1100    };
1101    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1102
1103    for (int32_t i=0; i<DATA_length; i+=3) {
1104        UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1105        UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1106        switch (*DATA[i]) {
1107        case 0x68: //'h': // Hiragana-Katakana
1108            expect(*hk, h, k);
1109            break;
1110        case 0x6B: //'k': // Katakana-Hiragana
1111            expect(*kh, k, h);
1112            break;
1113        case 0x62: //'b': // both
1114            expect(*hk, h, k);
1115            expect(*kh, k, h);
1116            break;
1117        }
1118    }
1119    delete hk;
1120    delete kh;
1121}
1122
1123/**
1124 * Test cloning / copy constructor of RBT.
1125 */
1126void TransliteratorTest::TestCopyJ476(void) {
1127    // The real test here is what happens when the destructors are
1128    // called.  So we let one object get destructed, and check to
1129    // see that its copy still works.
1130    Transliterator *t2 = 0;
1131    {
1132        UParseError parseError;
1133        UErrorCode status = U_ZERO_ERROR;
1134        Transliterator *t1 = Transliterator::createFromRules("t1",
1135            "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1136        if (U_FAILURE(status)) {
1137            errln("FAIL: RBT constructor");
1138            return;
1139        }
1140        t2 = t1->clone(); // Call copy constructor under the covers.
1141        expect(*t1, "abcfoofoo", "ABcbar");
1142        delete t1;
1143    }
1144    expect(*t2, "abcfoofoo", "ABcbar");
1145    delete t2;
1146}
1147
1148/**
1149 * Test inter-Indic transliterators.  These are composed.
1150 * ICU4C Jitterbug 483.
1151 */
1152void TransliteratorTest::TestInterIndic(void) {
1153    UnicodeString ID("Devanagari-Gujarati", "");
1154    UErrorCode status = U_ZERO_ERROR;
1155    UParseError parseError;
1156    Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1157    if (dg == 0) {
1158        dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1159        return;
1160    }
1161    UnicodeString id = dg->getID();
1162    if (id != ID) {
1163        errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1164    }
1165    UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1166    UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1167    expect(*dg, dev, guj);
1168    delete dg;
1169}
1170
1171/**
1172 * Test filter syntax in IDs. (J918)
1173 */
1174void TransliteratorTest::TestFilterIDs(void) {
1175    // Array of 3n strings:
1176    // <id>, <inverse id>, <input>, <expected output>
1177    const char* DATA[] = {
1178        "[aeiou]Any-Hex", // ID
1179        "[aeiou]Hex-Any", // expected inverse ID
1180        "quizzical",      // src
1181        "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1182
1183        "[aeiou]Any-Hex;[^5]Hex-Any",
1184        "[^5]Any-Hex;[aeiou]Hex-Any",
1185        "quizzical",
1186        "q\\u0075izzical",
1187
1188        "[abc]Null",
1189        "[abc]Null",
1190        "xyz",
1191        "xyz",
1192    };
1193    enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1194
1195    for (int i=0; i<DATA_length; i+=4) {
1196        UnicodeString ID(DATA[i], "");
1197        UnicodeString uID(DATA[i+1], "");
1198        UnicodeString data2(DATA[i+2], "");
1199        UnicodeString data3(DATA[i+3], "");
1200        UParseError parseError;
1201        UErrorCode status = U_ZERO_ERROR;
1202        Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1203        if (t == 0) {
1204            errln("FAIL: createInstance(" + ID + ") returned NULL");
1205            return;
1206        }
1207        expect(*t, data2, data3);
1208
1209        // Check the ID
1210        if (ID != t->getID()) {
1211            errln("FAIL: createInstance(" + ID + ").getID() => " +
1212                  t->getID());
1213        }
1214
1215        // Check the inverse
1216        Transliterator *u = t->createInverse(status);
1217        if (u == 0) {
1218            errln("FAIL: " + ID + ".createInverse() returned NULL");
1219        } else if (u->getID() != uID) {
1220            errln("FAIL: " + ID + ".createInverse().getID() => " +
1221                  u->getID() + ", expected " + uID);
1222        }
1223
1224        delete t;
1225        delete u;
1226    }
1227}
1228
1229/**
1230 * Test the case mapping transliterators.
1231 */
1232void TransliteratorTest::TestCaseMap(void) {
1233    UParseError parseError;
1234    UErrorCode status = U_ZERO_ERROR;
1235    Transliterator* toUpper =
1236        Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1237    Transliterator* toLower =
1238        Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239    Transliterator* toTitle =
1240        Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241    if (toUpper==0 || toLower==0 || toTitle==0) {
1242        errln("FAIL: createInstance returned NULL");
1243        delete toUpper;
1244        delete toLower;
1245        delete toTitle;
1246        return;
1247    }
1248
1249    expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1250           "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1251    expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1252           "the quick brown foX jumped over the lazY dogs.");
1253    expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1254           "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1255
1256    delete toUpper;
1257    delete toLower;
1258    delete toTitle;
1259}
1260
1261/**
1262 * Test the name mapping transliterators.
1263 */
1264void TransliteratorTest::TestNameMap(void) {
1265    UParseError parseError;
1266    UErrorCode status = U_ZERO_ERROR;
1267    Transliterator* uni2name =
1268        Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1269    Transliterator* name2uni =
1270        Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1271    if (uni2name==0 || name2uni==0) {
1272        errln("FAIL: createInstance returned NULL");
1273        delete uni2name;
1274        delete name2uni;
1275        return;
1276    }
1277
1278    // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1279    expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1280           CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1281    expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1282           CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1283
1284    delete uni2name;
1285    delete name2uni;
1286
1287    // round trip
1288    Transliterator* t =
1289        Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1290    if (t==0) {
1291        errln("FAIL: createInstance returned NULL");
1292        delete t;
1293        return;
1294    }
1295
1296    // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1297    UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1298    expect(*t, s, s);
1299    delete t;
1300}
1301
1302/**
1303 * Test liberalized ID syntax.  1006c
1304 */
1305void TransliteratorTest::TestLiberalizedID(void) {
1306    // Some test cases have an expected getID() value of NULL.  This
1307    // means I have disabled the test case for now.  This stuff is
1308    // still under development, and I haven't decided whether to make
1309    // getID() return canonical case yet.  It will all get rewritten
1310    // with the move to Source-Target/Variant IDs anyway. [aliu]
1311    const char* DATA[] = {
1312        "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1313        "  Null  ", "Null", "whitespace",
1314        " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1315        "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1316    };
1317    const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1318    UParseError parseError;
1319    UErrorCode status= U_ZERO_ERROR;
1320    for (int32_t i=0; i<DATA_length; i+=3) {
1321        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1322        if (t == 0) {
1323            dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1324                  " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1325        } else {
1326            UnicodeString exp;
1327            if (DATA[i+1]) {
1328                exp = UnicodeString(DATA[i+1], "");
1329            }
1330            // Don't worry about getID() if the expected char*
1331            // is NULL -- see above.
1332            if (exp.length() == 0 || exp == t->getID()) {
1333                logln(UnicodeString("Ok: ") + DATA[i+2] +
1334                      " create ID \"" + DATA[i] + "\" => \"" +
1335                      exp + "\"");
1336            } else {
1337                errln(UnicodeString("FAIL: ") + DATA[i+2] +
1338                      " create ID \"" + DATA[i] + "\" => \"" +
1339                      t->getID() + "\", exp \"" + exp + "\"");
1340            }
1341            delete t;
1342        }
1343    }
1344}
1345
1346/* test for Jitterbug 912 */
1347void TransliteratorTest::TestCreateInstance(){
1348    const char* FORWARD = "F";
1349    const char* REVERSE = "R";
1350    const char* DATA[] = {
1351        // Column 1: id
1352        // Column 2: direction
1353        // Column 3: expected ID, or "" if expect failure
1354        "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1355
1356        // JB#2689: bad compound causes crash
1357        "InvalidSource-InvalidTarget", FORWARD, "",
1358        "InvalidSource-InvalidTarget", REVERSE, "",
1359        "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1360        "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1361        "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1362        "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1363
1364        NULL
1365    };
1366
1367    for (int32_t i=0; DATA[i]; i+=3) {
1368        UParseError err;
1369        UErrorCode ec = U_ZERO_ERROR;
1370        UnicodeString id(DATA[i]);
1371        UTransDirection dir = (DATA[i+1]==FORWARD)?
1372            UTRANS_FORWARD:UTRANS_REVERSE;
1373        UnicodeString expID(DATA[i+2]);
1374        Transliterator* t =
1375            Transliterator::createInstance(id,dir,err,ec);
1376        UnicodeString newID;
1377        if (t) {
1378            newID = t->getID();
1379        }
1380        UBool ok = (newID == expID);
1381        if (!t) {
1382            newID = u_errorName(ec);
1383        }
1384        if (ok) {
1385            logln((UnicodeString)"Ok: createInstance(" +
1386                  id + "," + DATA[i+1] + ") => " + newID);
1387        } else {
1388            dataerrln((UnicodeString)"FAIL: createInstance(" +
1389                  id + "," + DATA[i+1] + ") => " + newID +
1390                  ", expected " + expID);
1391        }
1392        delete t;
1393    }
1394}
1395
1396/**
1397 * Test the normalization transliterator.
1398 */
1399void TransliteratorTest::TestNormalizationTransliterator() {
1400    // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1401    // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1402    const char* CANON[] = {
1403        // Input               Decomposed            Composed
1404        "cat",                "cat",                "cat"               ,
1405        "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1406
1407        "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1408        "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1409
1410        "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1411        "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1412        "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1413
1414        "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1415        "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1416
1417        "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1418        "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1419        "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1420
1421        "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1422        "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1423
1424        "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1425        "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1426
1427        "Henry IV",           "Henry IV",           "Henry IV"          ,
1428        "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1429
1430        "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1431        "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1432        "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1433        "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1434        "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1435
1436        "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1437        0 // end
1438    };
1439
1440    const char* COMPAT[] = {
1441        // Input               Decomposed            Composed
1442        "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1443
1444        "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1445        "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1446
1447        "Henry IV",           "Henry IV",           "Henry IV"          ,
1448        "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1449
1450        "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1451        "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1452
1453        "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1454        0 // end
1455    };
1456
1457    int32_t i;
1458    UParseError parseError;
1459    UErrorCode status = U_ZERO_ERROR;
1460    Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1461    Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1462    if (!NFD || !NFC) {
1463        dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1464        delete NFD;
1465        delete NFC;
1466        return;
1467    }
1468    for (i=0; CANON[i]; i+=3) {
1469        UnicodeString in = CharsToUnicodeString(CANON[i]);
1470        UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1471        UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1472        expect(*NFD, in, expd);
1473        expect(*NFC, in, expc);
1474    }
1475    delete NFD;
1476    delete NFC;
1477
1478    Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1479    Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1480    if (!NFKD || !NFKC) {
1481        errln("FAIL: createInstance failed");
1482        delete NFKD;
1483        delete NFKC;
1484        return;
1485    }
1486    for (i=0; COMPAT[i]; i+=3) {
1487        UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1488        UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1489        UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1490        expect(*NFKD, in, expkd);
1491        expect(*NFKC, in, expkc);
1492    }
1493    delete NFKD;
1494    delete NFKC;
1495
1496    UParseError pe;
1497    status = U_ZERO_ERROR;
1498    Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1499                                                       UTRANS_FORWARD,
1500                                                       pe, status);
1501    if (t == 0) {
1502        errln("FAIL: createInstance failed");
1503    }
1504    expect(*t, CharsToUnicodeString("\\u010dx"),
1505           CharsToUnicodeString("c\\u030C"));
1506    delete t;
1507}
1508
1509/**
1510 * Test compound RBT rules.
1511 */
1512void TransliteratorTest::TestCompoundRBT(void) {
1513    // Careful with spacing and ';' here:  Phrase this exactly
1514    // as toRules() is going to return it.  If toRules() changes
1515    // with regard to spacing or ';', then adjust this string.
1516    UnicodeString rule("::Hex-Any;\n"
1517                       "::Any-Lower;\n"
1518                       "a > '.A.';\n"
1519                       "b > '.B.';\n"
1520                       "::[^t]Any-Upper;", "");
1521    UParseError parseError;
1522    UErrorCode status = U_ZERO_ERROR;
1523    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1524    if (t == 0) {
1525        errln("FAIL: createFromRules failed");
1526        return;
1527    }
1528    expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1529           "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1530    UnicodeString r;
1531    t->toRules(r, TRUE);
1532    if (r == rule) {
1533        logln((UnicodeString)"OK: toRules() => " + r);
1534    } else {
1535        errln((UnicodeString)"FAIL: toRules() => " + r +
1536              ", expected " + rule);
1537    }
1538    delete t;
1539
1540    // Now test toRules
1541    t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1542    if (t == 0) {
1543        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1544        return;
1545    }
1546    UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1547    t->toRules(r, TRUE);
1548    if (r != exp) {
1549        errln((UnicodeString)"FAIL: toRules() => " + r +
1550              ", expected " + exp);
1551    } else {
1552        logln((UnicodeString)"OK: toRules() => " + r);
1553    }
1554    delete t;
1555
1556    // Round trip the result of toRules
1557    t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1558    if (t == 0) {
1559        errln("FAIL: createFromRules #2 failed");
1560        return;
1561    } else {
1562        logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1563    }
1564
1565    // Test toRules again
1566    t->toRules(r, TRUE);
1567    if (r != exp) {
1568        errln((UnicodeString)"FAIL: toRules() => " + r +
1569              ", expected " + exp);
1570    } else {
1571        logln((UnicodeString)"OK: toRules() => " + r);
1572    }
1573
1574    delete t;
1575
1576    // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1577    // to what the regenerated ID will look like.
1578    UnicodeString id("Upper(Lower);(NFKC)", "");
1579    t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1580    if (t == 0) {
1581        errln("FAIL: createInstance #2 failed");
1582        return;
1583    }
1584    if (t->getID() == id) {
1585        logln((UnicodeString)"OK: created " + id);
1586    } else {
1587        errln((UnicodeString)"FAIL: createInstance(" + id +
1588              ").getID() => " + t->getID());
1589    }
1590
1591    Transliterator *u = t->createInverse(status);
1592    if (u == 0) {
1593        errln("FAIL: createInverse failed");
1594        delete t;
1595        return;
1596    }
1597    exp = "NFKC();Lower(Upper)";
1598    if (u->getID() == exp) {
1599        logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1600              u->getID());
1601    } else {
1602        errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1603              u->getID());
1604    }
1605    delete t;
1606    delete u;
1607}
1608
1609/**
1610 * Compound filter semantics were orginially not implemented
1611 * correctly.  Originally, each component filter f(i) is replaced by
1612 * f'(i) = f(i) && g, where g is the filter for the compound
1613 * transliterator.
1614 *
1615 * From Mark:
1616 *
1617 * Suppose and I have a transliterator X. Internally X is
1618 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1619 *
1620 * The compound should convert all greek characters (through latin) to
1621 * cyrillic, then lowercase the result. The filter should say "don't
1622 * touch 'A' in the original". But because an intermediate result
1623 * happens to go through "A", the Greek Alpha gets hung up.
1624 */
1625void TransliteratorTest::TestCompoundFilter(void) {
1626    UParseError parseError;
1627    UErrorCode status = U_ZERO_ERROR;
1628    Transliterator *t = Transliterator::createInstance
1629        ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1630    if (t == 0) {
1631        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1632        return;
1633    }
1634    t->adoptFilter(new UnicodeSet("[^A]", status));
1635    if (U_FAILURE(status)) {
1636        errln("FAIL: UnicodeSet ct failed");
1637        delete t;
1638        return;
1639    }
1640
1641    // Only the 'A' at index 1 should remain unchanged
1642    expect(*t,
1643           CharsToUnicodeString("BA\\u039A\\u0391"),
1644           CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1645    delete t;
1646}
1647
1648void TransliteratorTest::TestRemove(void) {
1649    UParseError parseError;
1650    UErrorCode status = U_ZERO_ERROR;
1651    Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1652    if (t == 0) {
1653        errln("FAIL: createInstance failed");
1654        return;
1655    }
1656
1657    expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1658
1659    // extra test for RemoveTransliterator::clone(), which at one point wasn't
1660    // duplicating the filter
1661    Transliterator* t2 = t->clone();
1662    expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1663
1664    delete t;
1665    delete t2;
1666}
1667
1668void TransliteratorTest::TestToRules(void) {
1669    const char* RBT = "rbt";
1670    const char* SET = "set";
1671    static const char* DATA[] = {
1672        RBT,
1673        "$a=\\u4E61; [$a] > A;",
1674        "[\\u4E61] > A;",
1675
1676        RBT,
1677        "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1678        "[[:Zs:][:Zl:]]{a} > A;",
1679
1680        SET,
1681        "[[:Zs:][:Zl:]]",
1682        "[[:Zs:][:Zl:]]",
1683
1684        SET,
1685        "[:Ps:]",
1686        "[:Ps:]",
1687
1688        SET,
1689        "[:L:]",
1690        "[:L:]",
1691
1692        SET,
1693        "[[:L:]-[A]]",
1694        "[[:L:]-[A]]",
1695
1696        SET,
1697        "[~[:Lu:][:Ll:]]",
1698        "[~[:Lu:][:Ll:]]",
1699
1700        SET,
1701        "[~[a-z]]",
1702        "[~[a-z]]",
1703
1704        RBT,
1705        "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1706        "[^[:Zs:]]{a} > A;",
1707
1708        RBT,
1709        "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1710        "[[a-z]-[:Zs:]]{a} > A;",
1711
1712        RBT,
1713        "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1714        "[[:Zs:]&[a-z]]{a} > A;",
1715
1716        RBT,
1717        "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1718        "[x[:Zs:]]{a} > A;",
1719
1720        RBT,
1721        "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1722        "$macron = \\u0304 ;"
1723        "$evowel = [aeiouyAEIOUY] ;"
1724        "$iotasub = \\u0345 ;"
1725        "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1726        "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1727
1728        RBT,
1729        "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730        "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1731    };
1732    static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1733
1734    for (int32_t d=0; d < DATA_length; d+=3) {
1735        if (DATA[d] == RBT) {
1736            // Transliterator test
1737            UParseError parseError;
1738            UErrorCode status = U_ZERO_ERROR;
1739            Transliterator *t = Transliterator::createFromRules("ID",
1740                                                                UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1741            if (t == 0) {
1742                dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1743                return;
1744            }
1745            UnicodeString rules, escapedRules;
1746            t->toRules(rules, FALSE);
1747            t->toRules(escapedRules, TRUE);
1748            UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1749            UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1750            if (rules == expRules) {
1751                logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1752                      " => " + rules);
1753            } else {
1754                errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1755                      " => " + rules + ", exp " + expRules);
1756            }
1757            if (escapedRules == expEscapedRules) {
1758                logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1759                      " => " + escapedRules);
1760            } else {
1761                errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1762                      " => " + escapedRules + ", exp " + expEscapedRules);
1763            }
1764            delete t;
1765
1766        } else {
1767            // UnicodeSet test
1768            UErrorCode status = U_ZERO_ERROR;
1769            UnicodeString pat(DATA[d+1], -1, US_INV);
1770            UnicodeString expToPat(DATA[d+2], -1, US_INV);
1771            UnicodeSet set(pat, status);
1772            if (U_FAILURE(status)) {
1773                errln("FAIL: UnicodeSet ct failed");
1774                return;
1775            }
1776            // Adjust spacing etc. as necessary.
1777            UnicodeString toPat;
1778            set.toPattern(toPat);
1779            if (expToPat == toPat) {
1780                logln((UnicodeString)"Ok: " + pat +
1781                      " => " + toPat);
1782            } else {
1783                errln((UnicodeString)"FAIL: " + pat +
1784                      " => " + prettify(toPat, TRUE) +
1785                      ", exp " + prettify(pat, TRUE));
1786            }
1787        }
1788    }
1789}
1790
1791void TransliteratorTest::TestContext() {
1792    UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1793    expect("de > x; {d}e > y;",
1794           "de",
1795           "ye",
1796           &pos);
1797
1798    expect("ab{c} > z;",
1799           "xadabdabcy",
1800           "xadabdabzy");
1801}
1802
1803void TransliteratorTest::TestSupplemental() {
1804
1805    expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1806                                "a > $a; $s > i;"),
1807           CharsToUnicodeString("ab\\U0001030Fx"),
1808           CharsToUnicodeString("\\U00010300bix"));
1809
1810    expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1811                                "$b=[A-Z\\U00010400-\\U0001044D];"
1812                                "($a)($b) > $2 $1;"),
1813           CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1814           CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1815
1816    // k|ax\\U00010300xm
1817
1818    // k|a\\U00010400\\U00010300xm
1819    // ky|\\U00010400\\U00010300xm
1820    // ky\\U00010400|\\U00010300xm
1821
1822    // ky\\U00010400|\\U00010300\\U00010400m
1823    // ky\\U00010400y|\\U00010400m
1824    expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1825                                "$a {x} > | @ \\U00010400;"
1826                                "{$a} [^\\u0000-\\uFFFF] > y;"),
1827           CharsToUnicodeString("kax\\U00010300xm"),
1828           CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1829
1830    expectT("Any-Name",
1831           CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1832           UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1833
1834    expectT("Any-Hex/Unicode",
1835           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836           UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1837
1838    expectT("Any-Hex/C",
1839           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840           UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1841
1842    expectT("Any-Hex/Perl",
1843           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844           UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1845
1846    expectT("Any-Hex/Java",
1847           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848           UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1849
1850    expectT("Any-Hex/XML",
1851           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852           "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1853
1854    expectT("Any-Hex/XML10",
1855           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1856           "&#66352;&#1113856;&#917601;&#160;");
1857
1858    expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1859           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1860           CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1861}
1862
1863void TransliteratorTest::TestQuantifier() {
1864
1865    // Make sure @ in a quantified anteContext works
1866    expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1867           "AAAAAb",
1868           "aaa(aac)");
1869
1870    // Make sure @ in a quantified postContext works
1871    expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1872           "baaaaa",
1873           "caa(aaa)");
1874
1875    // Make sure @ in a quantified postContext with seg ref works
1876    expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1877           "baaaaa",
1878           "baa(aaa)");
1879
1880    // Make sure @ past ante context doesn't enter ante context
1881    UTransPosition pos = {0, 5, 3, 5};
1882    expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1883           "xxxab",
1884           "xxx(ac)",
1885           &pos);
1886
1887    // Make sure @ past post context doesn't pass limit
1888    UTransPosition pos2 = {0, 4, 0, 2};
1889    expect("{b} a+ > c @@ |; x > y; a > A;",
1890           "baxx",
1891           "caxx",
1892           &pos2);
1893
1894    // Make sure @ past post context doesn't enter post context
1895    expect("{b} a+ > c @@ |; x > y; a > A;",
1896           "baxx",
1897           "cayy");
1898
1899    expect("(ab)? c > d;",
1900           "c abc ababc",
1901           "d d abd");
1902
1903    // NOTE: The (ab)+ when referenced just yields a single "ab",
1904    // not the full sequence of them.  This accords with perl behavior.
1905    expect("(ab)+ {x} > '(' $1 ')';",
1906           "x abx ababxy",
1907           "x ab(ab) abab(ab)y");
1908
1909    expect("b+ > x;",
1910           "ac abc abbc abbbc",
1911           "ac axc axc axc");
1912
1913    expect("[abc]+ > x;",
1914           "qac abrc abbcs abtbbc",
1915           "qx xrx xs xtx");
1916
1917    expect("q{(ab)+} > x;",
1918           "qa qab qaba qababc qaba",
1919           "qa qx qxa qxc qxa");
1920
1921    expect("q(ab)* > x;",
1922           "qa qab qaba qababc",
1923           "xa x xa xc");
1924
1925    // NOTE: The (ab)+ when referenced just yields a single "ab",
1926    // not the full sequence of them.  This accords with perl behavior.
1927    expect("q(ab)* > '(' $1 ')';",
1928           "qa qab qaba qababc",
1929           "()a (ab) (ab)a (ab)c");
1930
1931    // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1932    // quoted string
1933    expect("'ab'+ > x;",
1934           "bb ab ababb",
1935           "bb x xb");
1936
1937    // $foo+ and $foo* -- the quantifier should apply to the entire
1938    // variable reference
1939    expect("$var = ab; $var+ > x;",
1940           "bb ab ababb",
1941           "bb x xb");
1942}
1943
1944class TestTrans : public Transliterator {
1945public:
1946    TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1947    }
1948    virtual Transliterator* clone(void) const {
1949        return new TestTrans(getID());
1950    }
1951    virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1952        UBool /*isIncremental*/) const
1953    {
1954        offsets.start = offsets.limit;
1955    }
1956    virtual UClassID getDynamicClassID() const;
1957    static UClassID U_EXPORT2 getStaticClassID();
1958};
1959UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1960
1961/**
1962 * Test Source-Target/Variant.
1963 */
1964void TransliteratorTest::TestSTV(void) {
1965    int32_t ns = Transliterator::countAvailableSources();
1966    if (ns < 0 || ns > 255) {
1967        errln((UnicodeString)"FAIL: Bad source count: " + ns);
1968        return;
1969    }
1970    int32_t i, j;
1971    for (i=0; i<ns; ++i) {
1972        UnicodeString source;
1973        Transliterator::getAvailableSource(i, source);
1974        logln((UnicodeString)"" + i + ": " + source);
1975        if (source.length() == 0) {
1976            errln("FAIL: empty source");
1977            continue;
1978        }
1979        int32_t nt = Transliterator::countAvailableTargets(source);
1980        if (nt < 0 || nt > 255) {
1981            errln((UnicodeString)"FAIL: Bad target count: " + nt);
1982            continue;
1983        }
1984        for (int32_t j=0; j<nt; ++j) {
1985            UnicodeString target;
1986            Transliterator::getAvailableTarget(j, source, target);
1987            logln((UnicodeString)" " + j + ": " + target);
1988            if (target.length() == 0) {
1989                errln("FAIL: empty target");
1990                continue;
1991            }
1992            int32_t nv = Transliterator::countAvailableVariants(source, target);
1993            if (nv < 0 || nv > 255) {
1994                errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1995                continue;
1996            }
1997            for (int32_t k=0; k<nv; ++k) {
1998                UnicodeString variant;
1999                Transliterator::getAvailableVariant(k, source, target, variant);
2000                if (variant.length() == 0) {
2001                    logln((UnicodeString)"  " + k + ": <empty>");
2002                } else {
2003                    logln((UnicodeString)"  " + k + ": " + variant);
2004                }
2005            }
2006        }
2007    }
2008
2009    // Test registration
2010    const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011    const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2012    const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2013    for (i=0; i<3; ++i) {
2014        Transliterator *t = new TestTrans(IDS[i]);
2015        if (t == 0) {
2016            errln("FAIL: out of memory");
2017            return;
2018        }
2019        if (t->getID() != IDS[i]) {
2020            errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2021            delete t;
2022            return;
2023        }
2024        Transliterator::registerInstance(t);
2025        UErrorCode status = U_ZERO_ERROR;
2026        t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2027        if (t == NULL) {
2028            errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2029                  IDS[i]);
2030        } else {
2031            logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2032                  IDS[i]);
2033            delete t;
2034        }
2035        Transliterator::unregister(IDS[i]);
2036        t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2037        if (t != NULL) {
2038            errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2039                  IDS[i]);
2040            delete t;
2041        }
2042    }
2043
2044    // Make sure getAvailable API reflects removal
2045    int32_t n = Transliterator::countAvailableIDs();
2046    for (i=0; i<n; ++i) {
2047        UnicodeString id = Transliterator::getAvailableID(i);
2048        for (j=0; j<3; ++j) {
2049            if (id.caseCompare(FULL_IDS[j],0)==0) {
2050                errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2051            }
2052        }
2053    }
2054    n = Transliterator::countAvailableTargets("Any");
2055    for (i=0; i<n; ++i) {
2056        UnicodeString t;
2057        Transliterator::getAvailableTarget(i, "Any", t);
2058        if (t.caseCompare(IDS[0],0)==0) {
2059            errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2060        }
2061    }
2062    n = Transliterator::countAvailableSources();
2063    for (i=0; i<n; ++i) {
2064        UnicodeString s;
2065        Transliterator::getAvailableSource(i, s);
2066        for (j=0; j<3; ++j) {
2067            if (SOURCES[j] == NULL) continue;
2068            if (s.caseCompare(SOURCES[j],0)==0) {
2069                errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2070            }
2071        }
2072    }
2073}
2074
2075/**
2076 * Test inverse of Greek-Latin; Title()
2077 */
2078void TransliteratorTest::TestCompoundInverse(void) {
2079    UParseError parseError;
2080    UErrorCode status = U_ZERO_ERROR;
2081    Transliterator *t = Transliterator::createInstance
2082        ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2083    if (t == 0) {
2084        dataerrln("FAIL: createInstance - %s", u_errorName(status));
2085        return;
2086    }
2087    UnicodeString exp("(Title);Latin-Greek");
2088    if (t->getID() == exp) {
2089        logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2090              t->getID());
2091    } else {
2092        errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2093              t->getID() + "\", expected \"" + exp + "\"");
2094    }
2095    delete t;
2096}
2097
2098/**
2099 * Test NFD chaining with RBT
2100 */
2101void TransliteratorTest::TestNFDChainRBT() {
2102    UParseError pe;
2103    UErrorCode ec = U_ZERO_ERROR;
2104    Transliterator* t = Transliterator::createFromRules(
2105                               "TEST", "::NFD; aa > Q; a > q;",
2106                               UTRANS_FORWARD, pe, ec);
2107    if (t == NULL || U_FAILURE(ec)) {
2108        dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2109        return;
2110    }
2111    expect(*t, "aa", "Q");
2112    delete t;
2113
2114    // TEMPORARY TESTS -- BEING DEBUGGED
2115//=-    UnicodeString s, s2;
2116//=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2117//=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2118//=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2119//=-    expect(*t, s, s2);
2120//=-    delete t;
2121//=-
2122//=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2123//=-    expect(*t, s2, s);
2124//=-    delete t;
2125//=-
2126//=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2127//=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2128//=-    expect(*t, s, s);
2129//=-    delete t;
2130
2131//    const char* source[] = {
2132//        /*
2133//        "\\u015Br\\u012Bmad",
2134//        "bhagavadg\\u012Bt\\u0101",
2135//        "adhy\\u0101ya",
2136//        "arjuna",
2137//        "vi\\u1E63\\u0101da",
2138//        "y\\u014Dga",
2139//        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2140//        "uv\\u0101cr\\u0325",
2141//        */
2142//        "rmk\\u1E63\\u0113t",
2143//      //"dharmak\\u1E63\\u0113tr\\u0113",
2144//        /*
2145//        "kuruk\\u1E63\\u0113tr\\u0113",
2146//        "samav\\u0113t\\u0101",
2147//        "yuyutsava-\\u1E25",
2148//        "m\\u0101mak\\u0101-\\u1E25",
2149//     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2150//        "kimakurvata",
2151//        "san\\u0304java",
2152//        */
2153//
2154//        0
2155//    };
2156//    const char* expected[] = {
2157//        /*
2158//        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2159//        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2160//        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2161//        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2162//        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2163//        "\\u092f\\u094b\\u0917",
2164//        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2165//        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2166//        */
2167//        "\\u0927",
2168//        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2169//        /*
2170//        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171//        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2172//        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2173//        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2174//    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2175//        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2176//        "\\u0938\\u0902\\u091c\\u0935",
2177//        */
2178//        0
2179//    };
2180//    UErrorCode status = U_ZERO_ERROR;
2181//    UParseError parseError;
2182//    UnicodeString message;
2183//    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2184//    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2185//    if(U_FAILURE(status)){
2186//        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2187//        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2188//        delete latinToDevToLatin;
2189//        delete devToLatinToDev;
2190//        return;
2191//    }
2192//    UnicodeString gotResult;
2193//    for(int i= 0; source[i] != 0; i++){
2194//        gotResult = source[i];
2195//        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2196//        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2197//    }
2198//    delete latinToDevToLatin;
2199//    delete devToLatinToDev;
2200}
2201
2202/**
2203 * Inverse of "Null" should be "Null". (J21)
2204 */
2205void TransliteratorTest::TestNullInverse() {
2206    UParseError pe;
2207    UErrorCode ec = U_ZERO_ERROR;
2208    Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2209    if (t == 0 || U_FAILURE(ec)) {
2210        errln("FAIL: createInstance");
2211        return;
2212    }
2213    Transliterator *u = t->createInverse(ec);
2214    if (u == 0 || U_FAILURE(ec)) {
2215        errln("FAIL: createInverse");
2216        delete t;
2217        return;
2218    }
2219    if (u->getID() != "Null") {
2220        errln("FAIL: Inverse of Null should be Null");
2221    }
2222    delete t;
2223    delete u;
2224}
2225
2226/**
2227 * Check ID of inverse of alias. (J22)
2228 */
2229void TransliteratorTest::TestAliasInverseID() {
2230    UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2231    UParseError pe;
2232    UErrorCode ec = U_ZERO_ERROR;
2233    Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2234    if (t == 0 || U_FAILURE(ec)) {
2235        dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2236        return;
2237    }
2238    Transliterator *u = t->createInverse(ec);
2239    if (u == 0 || U_FAILURE(ec)) {
2240        errln("FAIL: createInverse");
2241        delete t;
2242        return;
2243    }
2244    UnicodeString exp = "Hangul-Latin";
2245    UnicodeString got = u->getID();
2246    if (got != exp) {
2247        errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2248              ", expected " + exp);
2249    }
2250    delete t;
2251    delete u;
2252}
2253
2254/**
2255 * Test IDs of inverses of compound transliterators. (J20)
2256 */
2257void TransliteratorTest::TestCompoundInverseID() {
2258    UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2259    UParseError pe;
2260    UErrorCode ec = U_ZERO_ERROR;
2261    Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2262    if (t == 0 || U_FAILURE(ec)) {
2263        dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2264        return;
2265    }
2266    Transliterator *u = t->createInverse(ec);
2267    if (u == 0 || U_FAILURE(ec)) {
2268        errln("FAIL: createInverse");
2269        delete t;
2270        return;
2271    }
2272    UnicodeString exp = "NFD(NFC);Jamo-Latin";
2273    UnicodeString got = u->getID();
2274    if (got != exp) {
2275        errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2276              ", expected " + exp);
2277    }
2278    delete t;
2279    delete u;
2280}
2281
2282/**
2283 * Test undefined variable.
2284
2285 */
2286void TransliteratorTest::TestUndefinedVariable() {
2287    UnicodeString rule = "$initial } a <> \\u1161;";
2288    UParseError pe;
2289    UErrorCode ec = U_ZERO_ERROR;
2290    Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2291    delete t;
2292    if (U_FAILURE(ec)) {
2293        logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2294              u_errorName(ec));
2295        return;
2296    }
2297    errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2298          u_errorName(ec));
2299}
2300
2301/**
2302 * Test empty context.
2303 */
2304void TransliteratorTest::TestEmptyContext() {
2305    expect(" { a } > b;", "xay a ", "xby b ");
2306}
2307
2308/**
2309* Test compound filter ID syntax
2310*/
2311void TransliteratorTest::TestCompoundFilterID(void) {
2312    static const char* DATA[] = {
2313        // Col. 1 = ID or rule set (latter must start with #)
2314
2315        // = columns > 1 are null if expect col. 1 to be illegal =
2316
2317        // Col. 2 = direction, "F..." or "R..."
2318        // Col. 3 = source string
2319        // Col. 4 = exp result
2320
2321        "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2322        "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2323        "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2324        "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2325        "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2326        "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327        NULL,
2328    };
2329
2330    for (int32_t i=0; DATA[i]; i+=4) {
2331        UnicodeString id = CharsToUnicodeString(DATA[i]);
2332        UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2333            UTRANS_REVERSE : UTRANS_FORWARD;
2334        UnicodeString source;
2335        UnicodeString exp;
2336        if (DATA[i+2] != NULL) {
2337            source = CharsToUnicodeString(DATA[i+2]);
2338            exp = CharsToUnicodeString(DATA[i+3]);
2339        }
2340        UBool expOk = (DATA[i+1] != NULL);
2341        Transliterator* t = NULL;
2342        UParseError pe;
2343        UErrorCode ec = U_ZERO_ERROR;
2344        if (id.charAt(0) == 0x23/*#*/) {
2345            t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2346        } else {
2347            t = Transliterator::createInstance(id, direction, pe, ec);
2348        }
2349        UBool ok = (t != NULL && U_SUCCESS(ec));
2350        UnicodeString transID;
2351        if (t!=0) {
2352            transID = t->getID();
2353        }
2354        else {
2355            transID = UnicodeString("NULL", "");
2356        }
2357        if (ok == expOk) {
2358            logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2359                  u_errorName(ec));
2360            if (source.length() != 0) {
2361                expect(*t, source, exp);
2362            }
2363            delete t;
2364        } else {
2365            dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2366                  u_errorName(ec));
2367        }
2368    }
2369}
2370
2371/**
2372 * Test new property set syntax
2373 */
2374void TransliteratorTest::TestPropertySet() {
2375    expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2376    expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2377           "[ a stitch ]\n[ in time ]\r[ saves 9]");
2378}
2379
2380/**
2381 * Test various failure points of the new 2.0 engine.
2382 */
2383void TransliteratorTest::TestNewEngine() {
2384    UParseError pe;
2385    UErrorCode ec = U_ZERO_ERROR;
2386    Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2387    if (t == 0 || U_FAILURE(ec)) {
2388        dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2389        return;
2390    }
2391    // Katakana should be untouched
2392    expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2393           CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2394
2395    delete t;
2396
2397#if 1
2398    // This test will only work if Transliterator.ROLLBACK is
2399    // true.  Otherwise, this test will fail, revealing a
2400    // limitation of global filters in incremental mode.
2401    Transliterator *a =
2402        Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2403    Transliterator *A =
2404        Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2405    if (U_FAILURE(ec)) {
2406        delete a;
2407        delete A;
2408        return;
2409    }
2410
2411    Transliterator* array[3];
2412    array[0] = a;
2413    array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2414    array[2] = A;
2415    if (U_FAILURE(ec)) {
2416        errln("FAIL: createInstance NFD");
2417        delete a;
2418        delete A;
2419        delete array[1];
2420        return;
2421    }
2422
2423    t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2424    if (U_FAILURE(ec)) {
2425        errln("FAIL: UnicodeSet constructor");
2426        delete a;
2427        delete A;
2428        delete array[1];
2429        delete t;
2430        return;
2431    }
2432
2433    expect(*t, "aAaA", "bAbA");
2434
2435    assertTrue("countElements", t->countElements() == 3);
2436    assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2437    assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2438    assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2439    assertSuccess("getElement", ec);
2440
2441    delete a;
2442    delete A;
2443    delete array[1];
2444    delete t;
2445#endif
2446
2447    expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2448           "a",
2449           "ax");
2450
2451    UnicodeString gr = CharsToUnicodeString(
2452        "$ddot = \\u0308 ;"
2453        "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2454        "$rough = \\u0314 ;"
2455        "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2456        "\\u03b1 <> a ;"
2457        "$rough <> h ;");
2458
2459    expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2460}
2461
2462/**
2463 * Test quantified segment behavior.  We want:
2464 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2465 */
2466void TransliteratorTest::TestQuantifiedSegment(void) {
2467    // The normal case
2468    expect("([abc]+) > x $1 x;", "cba", "xcbax");
2469
2470    // The tricky case; the quantifier is around the segment
2471    expect("([abc])+ > x $1 x;", "cba", "xax");
2472
2473    // Tricky case in reverse direction
2474    expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2475
2476    // Check post-context segment
2477    expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2478
2479    // Test toRule/toPattern for non-quantified segment.
2480    // Careful with spacing here.
2481    UnicodeString r("([a-c]){q} > x $1 x;");
2482    UParseError pe;
2483    UErrorCode ec = U_ZERO_ERROR;
2484    Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2485    if (U_FAILURE(ec)) {
2486        errln("FAIL: createFromRules");
2487        delete t;
2488        return;
2489    }
2490    UnicodeString rr;
2491    t->toRules(rr, TRUE);
2492    if (r != rr) {
2493        errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2494    } else {
2495        logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496    }
2497    delete t;
2498
2499    // Test toRule/toPattern for quantified segment.
2500    // Careful with spacing here.
2501    r = "([a-c])+{q} > x $1 x;";
2502    t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2503    if (U_FAILURE(ec)) {
2504        errln("FAIL: createFromRules");
2505        delete t;
2506        return;
2507    }
2508    t->toRules(rr, TRUE);
2509    if (r != rr) {
2510        errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2511    } else {
2512        logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513    }
2514    delete t;
2515}
2516
2517//======================================================================
2518// Ram's tests
2519//======================================================================
2520void TransliteratorTest::TestDevanagariLatinRT(){
2521    const int MAX_LEN= 52;
2522    const char* const source[MAX_LEN] = {
2523        "bh\\u0101rata",
2524        "kra",
2525        "k\\u1E63a",
2526        "khra",
2527        "gra",
2528        "\\u1E45ra",
2529        "cra",
2530        "chra",
2531        "j\\u00F1a",
2532        "jhra",
2533        "\\u00F1ra",
2534        "\\u1E6Dya",
2535        "\\u1E6Dhra",
2536        "\\u1E0Dya",
2537      //"r\\u0323ya", // \u095c is not valid in Devanagari
2538        "\\u1E0Dhya",
2539        "\\u1E5Bhra",
2540        "\\u1E47ra",
2541        "tta",
2542        "thra",
2543        "dda",
2544        "dhra",
2545        "nna",
2546        "pra",
2547        "phra",
2548        "bra",
2549        "bhra",
2550        "mra",
2551        "\\u1E49ra",
2552      //"l\\u0331ra",
2553        "yra",
2554        "\\u1E8Fra",
2555      //"l-",
2556        "vra",
2557        "\\u015Bra",
2558        "\\u1E63ra",
2559        "sra",
2560        "hma",
2561        "\\u1E6D\\u1E6Da",
2562        "\\u1E6D\\u1E6Dha",
2563        "\\u1E6Dh\\u1E6Dha",
2564        "\\u1E0D\\u1E0Da",
2565        "\\u1E0D\\u1E0Dha",
2566        "\\u1E6Dya",
2567        "\\u1E6Dhya",
2568        "\\u1E0Dya",
2569        "\\u1E0Dhya",
2570        // Not roundtrippable --
2571        // \\u0939\\u094d\\u094d\\u092E  - hma
2572        // \\u0939\\u094d\\u092E         - hma
2573        // CharsToUnicodeString("hma"),
2574        "hya",
2575        "\\u015Br\\u0325",
2576        "\\u015Bca",
2577        "\\u0115",
2578        "san\\u0304j\\u012Bb s\\u0113nagupta",
2579        "\\u0101nand vaddir\\u0101ju",
2580        "\\u0101",
2581        "a"
2582    };
2583    const char* const expected[MAX_LEN] = {
2584        "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2585        "\\u0915\\u094D\\u0930",          /* kra         */
2586        "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2587        "\\u0916\\u094D\\u0930",          /* khra        */
2588        "\\u0917\\u094D\\u0930",          /* gra         */
2589        "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2590        "\\u091A\\u094D\\u0930",          /* cra         */
2591        "\\u091B\\u094D\\u0930",          /* chra        */
2592        "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2593        "\\u091D\\u094D\\u0930",          /* jhra        */
2594        "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2595        "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2596        "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2597        "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2598      //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2599        "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2600        "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2601        "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2602        "\\u0924\\u094D\\u0924",          /* tta         */
2603        "\\u0925\\u094D\\u0930",          /* thra        */
2604        "\\u0926\\u094D\\u0926",          /* dda         */
2605        "\\u0927\\u094D\\u0930",          /* dhra        */
2606        "\\u0928\\u094D\\u0928",          /* nna         */
2607        "\\u092A\\u094D\\u0930",          /* pra         */
2608        "\\u092B\\u094D\\u0930",          /* phra        */
2609        "\\u092C\\u094D\\u0930",          /* bra         */
2610        "\\u092D\\u094D\\u0930",          /* bhra        */
2611        "\\u092E\\u094D\\u0930",          /* mra         */
2612        "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2613      //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2614        "\\u092F\\u094D\\u0930",          /* yra         */
2615        "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2616      //"l-",
2617        "\\u0935\\u094D\\u0930",          /* vra         */
2618        "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2619        "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2620        "\\u0938\\u094D\\u0930",          /* sra         */
2621        "\\u0939\\u094d\\u092E",          /* hma         */
2622        "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2623        "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2624        "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2625        "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2626        "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2627        "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2628        "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2629        "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2630        "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2631     // "hma",                         /* hma         */
2632        "\\u0939\\u094D\\u092F",          /* hya         */
2633        "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2634        "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2635        "\\u090d",                        /* e\\u0306    */
2636        "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2637        "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2638        "\\u0906",
2639        "\\u0905",
2640    };
2641    UErrorCode status = U_ZERO_ERROR;
2642    UParseError parseError;
2643    UnicodeString message;
2644    Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2645    Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2646    if(U_FAILURE(status)){
2647        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2648        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2649        return;
2650    }
2651    UnicodeString gotResult;
2652    for(int i= 0; i<MAX_LEN; i++){
2653        gotResult = source[i];
2654        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2655        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2656    }
2657    delete latinToDev;
2658    delete devToLatin;
2659}
2660
2661void TransliteratorTest::TestTeluguLatinRT(){
2662    const int MAX_LEN=10;
2663    const char* const source[MAX_LEN] = {
2664        "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2665        "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2666        "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2667        "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2668        "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2669        "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2670        "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2671        "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2672        "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2673        "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2674    };
2675
2676    const char* const expected[MAX_LEN] = {
2677        "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2678        "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2679        "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680        "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2681        "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2682        "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2683        "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2684        "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2685        "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686        "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2687    };
2688
2689    UErrorCode status = U_ZERO_ERROR;
2690    UParseError parseError;
2691    UnicodeString message;
2692    Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2693    Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2694    if(U_FAILURE(status)){
2695        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2696        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2697        return;
2698    }
2699    UnicodeString gotResult;
2700    for(int i= 0; i<MAX_LEN; i++){
2701        gotResult = source[i];
2702        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2703        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2704    }
2705    delete latinToDev;
2706    delete devToLatin;
2707}
2708
2709void TransliteratorTest::TestSanskritLatinRT(){
2710    const int MAX_LEN =16;
2711    const char* const source[MAX_LEN] = {
2712        "rmk\\u1E63\\u0113t",
2713        "\\u015Br\\u012Bmad",
2714        "bhagavadg\\u012Bt\\u0101",
2715        "adhy\\u0101ya",
2716        "arjuna",
2717        "vi\\u1E63\\u0101da",
2718        "y\\u014Dga",
2719        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2720        "uv\\u0101cr\\u0325",
2721        "dharmak\\u1E63\\u0113tr\\u0113",
2722        "kuruk\\u1E63\\u0113tr\\u0113",
2723        "samav\\u0113t\\u0101",
2724        "yuyutsava\\u1E25",
2725        "m\\u0101mak\\u0101\\u1E25",
2726    // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2727        "kimakurvata",
2728        "san\\u0304java",
2729    };
2730    const char* const expected[MAX_LEN] = {
2731        "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2732        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2733        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2734        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2735        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2736        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2737        "\\u092f\\u094b\\u0917",
2738        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2739        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2740        "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2742        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2743        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2744        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2745    //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2746        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2747        "\\u0938\\u0902\\u091c\\u0935",
2748    };
2749    UErrorCode status = U_ZERO_ERROR;
2750    UParseError parseError;
2751    UnicodeString message;
2752    Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2753    Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2754    if(U_FAILURE(status)){
2755        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2756        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2757        return;
2758    }
2759    UnicodeString gotResult;
2760    for(int i= 0; i<MAX_LEN; i++){
2761        gotResult = source[i];
2762        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2763        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2764    }
2765    delete latinToDev;
2766    delete devToLatin;
2767}
2768
2769
2770void TransliteratorTest::TestCompoundLatinRT(){
2771    const char* const source[] = {
2772        "rmk\\u1E63\\u0113t",
2773        "\\u015Br\\u012Bmad",
2774        "bhagavadg\\u012Bt\\u0101",
2775        "adhy\\u0101ya",
2776        "arjuna",
2777        "vi\\u1E63\\u0101da",
2778        "y\\u014Dga",
2779        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2780        "uv\\u0101cr\\u0325",
2781        "dharmak\\u1E63\\u0113tr\\u0113",
2782        "kuruk\\u1E63\\u0113tr\\u0113",
2783        "samav\\u0113t\\u0101",
2784        "yuyutsava\\u1E25",
2785        "m\\u0101mak\\u0101\\u1E25",
2786     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2787        "kimakurvata",
2788        "san\\u0304java"
2789    };
2790    const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2791    const char* const expected[MAX_LEN] = {
2792        "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2793        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2794        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2795        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2796        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2797        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2798        "\\u092f\\u094b\\u0917",
2799        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2800        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2801        "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2803        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2804        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2805        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2806    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2807        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2808        "\\u0938\\u0902\\u091c\\u0935"
2809    };
2810    if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2811        errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2812        return;
2813    }
2814
2815    UErrorCode status = U_ZERO_ERROR;
2816    UParseError parseError;
2817    UnicodeString message;
2818    Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2819    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2820    Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2821    Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2822
2823    if(U_FAILURE(status)){
2824        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2825        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2826        return;
2827    }
2828    UnicodeString gotResult;
2829    for(int i= 0; i<MAX_LEN; i++){
2830        gotResult = source[i];
2831        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2832        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2833        expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2834
2835    }
2836    delete(latinToDevToLatin);
2837    delete(devToLatinToDev);
2838    delete(devToTelToDev);
2839    delete(latinToTelToLatin);
2840}
2841
2842/**
2843 * Test Gurmukhi-Devanagari Tippi and Bindi
2844 */
2845void TransliteratorTest::TestGurmukhiDevanagari(){
2846    // the rule says:
2847    // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2848    // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2849    UErrorCode status = U_ZERO_ERROR;
2850    UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2851    UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2852    UParseError parseError;
2853
2854    UnicodeSetIterator vIter(vowel);
2855    UnicodeSetIterator nvIter(non_vowel);
2856    Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2857    if(U_FAILURE(status)) {
2858      dataerrln("Error creating transliterator %s", u_errorName(status));
2859      delete trans;
2860      return;
2861    }
2862    UnicodeString src (" \\u0902", -1, US_INV);
2863    UnicodeString expected(" \\u0A02", -1, US_INV);
2864    src = src.unescape();
2865    expected= expected.unescape();
2866
2867    while(vIter.next()){
2868        src.setCharAt(0,(UChar) vIter.getCodepoint());
2869        expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2870        expect(*trans,src,expected);
2871    }
2872
2873    expected.setCharAt(1,0x0A70);
2874    while(nvIter.next()){
2875        //src.setCharAt(0,(char) nvIter.codepoint);
2876        src.setCharAt(0,(UChar)nvIter.getCodepoint());
2877        expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2878        expect(*trans,src,expected);
2879    }
2880    delete trans;
2881}
2882/**
2883 * Test instantiation from a locale.
2884 */
2885void TransliteratorTest::TestLocaleInstantiation(void) {
2886    UParseError pe;
2887    UErrorCode ec = U_ZERO_ERROR;
2888    Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2889    if (U_FAILURE(ec)) {
2890        dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2891        delete t;
2892        return;
2893    }
2894    expect(*t, CharsToUnicodeString("\\u0430"), "a");
2895    delete t;
2896
2897    t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2898    if (U_FAILURE(ec)) {
2899        errln("FAIL: createInstance(en-el)");
2900        delete t;
2901        return;
2902    }
2903    expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2904    delete t;
2905}
2906
2907/**
2908 * Test title case handling of accent (should ignore accents)
2909 */
2910void TransliteratorTest::TestTitleAccents(void) {
2911    UParseError pe;
2912    UErrorCode ec = U_ZERO_ERROR;
2913    Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2914    if (U_FAILURE(ec)) {
2915        errln("FAIL: createInstance(Title)");
2916        delete t;
2917        return;
2918    }
2919    expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2920    delete t;
2921}
2922
2923/**
2924 * Basic test of a locale resource based rule.
2925 */
2926void TransliteratorTest::TestLocaleResource() {
2927    const char* DATA[] = {
2928        // id                    from               to
2929        //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2930        "Latin-el",              "b",               "\\u03bc\\u03c0",
2931        "Latin-Greek",           "b",               "\\u03B2",
2932        "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2933        "el-Latin",              "\\u03B2",         "v",
2934        "Greek-Latin",           "\\u03B2",         "b",
2935    };
2936    const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2937    for (int32_t i=0; i<DATA_length; i+=3) {
2938        UParseError pe;
2939        UErrorCode ec = U_ZERO_ERROR;
2940        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2941        if (U_FAILURE(ec)) {
2942            dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2943            delete t;
2944            continue;
2945        }
2946        expect(*t, CharsToUnicodeString(DATA[i+1]),
2947               CharsToUnicodeString(DATA[i+2]));
2948        delete t;
2949    }
2950}
2951
2952/**
2953 * Make sure parse errors reference the right line.
2954 */
2955void TransliteratorTest::TestParseError() {
2956    static const char* rule =
2957        "a > b;\n"
2958        "# more stuff\n"
2959        "d << b;";
2960    UErrorCode ec = U_ZERO_ERROR;
2961    UParseError pe;
2962    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2963    delete t;
2964    if (U_FAILURE(ec)) {
2965        UnicodeString err(pe.preContext);
2966        err.append((UChar)124/*|*/).append(pe.postContext);
2967        if (err.indexOf("d << b") >= 0) {
2968            logln("Ok: " + err);
2969        } else {
2970            errln("FAIL: " + err);
2971        }
2972    }
2973    else {
2974        errln("FAIL: no syntax error");
2975    }
2976    static const char* maskingRule =
2977        "a>x;\n"
2978        "# more stuff\n"
2979        "ab>y;";
2980    ec = U_ZERO_ERROR;
2981    delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2982    if (ec != U_RULE_MASK_ERROR) {
2983        errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2984    }
2985    else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2986        errln("FAIL: did not get expected precontext");
2987    }
2988    else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2989        errln("FAIL: did not get expected postcontext");
2990    }
2991}
2992
2993/**
2994 * Make sure sets on output are disallowed.
2995 */
2996void TransliteratorTest::TestOutputSet() {
2997    UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2998    UErrorCode ec = U_ZERO_ERROR;
2999    UParseError pe;
3000    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3001    delete t;
3002    if (U_FAILURE(ec)) {
3003        UnicodeString err(pe.preContext);
3004        err.append((UChar)124/*|*/).append(pe.postContext);
3005        logln("Ok: " + err);
3006        return;
3007    }
3008    errln("FAIL: No syntax error");
3009}
3010
3011/**
3012 * Test the use variable range pragma, making sure that use of
3013 * variable range characters is detected and flagged as an error.
3014 */
3015void TransliteratorTest::TestVariableRange() {
3016    UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3017    UErrorCode ec = U_ZERO_ERROR;
3018    UParseError pe;
3019    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3020    delete t;
3021    if (U_FAILURE(ec)) {
3022        UnicodeString err(pe.preContext);
3023        err.append((UChar)124/*|*/).append(pe.postContext);
3024        logln("Ok: " + err);
3025        return;
3026    }
3027    errln("FAIL: No syntax error");
3028}
3029
3030/**
3031 * Test invalid post context error handling
3032 */
3033void TransliteratorTest::TestInvalidPostContext() {
3034    UnicodeString rule = "a}b{c>d;";
3035    UErrorCode ec = U_ZERO_ERROR;
3036    UParseError pe;
3037    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3038    delete t;
3039    if (U_FAILURE(ec)) {
3040        UnicodeString err(pe.preContext);
3041        err.append((UChar)124/*|*/).append(pe.postContext);
3042        if (err.indexOf("a}b{c") >= 0) {
3043            logln("Ok: " + err);
3044        } else {
3045            errln("FAIL: " + err);
3046        }
3047        return;
3048    }
3049    errln("FAIL: No syntax error");
3050}
3051
3052/**
3053 * Test ID form variants
3054 */
3055void TransliteratorTest::TestIDForms() {
3056    const char* DATA[] = {
3057        "NFC", NULL, "NFD",
3058        "nfd", NULL, "NFC", // make sure case is ignored
3059        "Any-NFKD", NULL, "Any-NFKC",
3060        "Null", NULL, "Null",
3061        "-nfkc", "nfkc", "NFKD",
3062        "-nfkc/", "nfkc", "NFKD",
3063        "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3064        "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3065        "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3066        "Source-", NULL, NULL,
3067        "Source/Variant-", NULL, NULL,
3068        "Source-/Variant", NULL, NULL,
3069        "/Variant", NULL, NULL,
3070        "/Variant-", NULL, NULL,
3071        "-/Variant", NULL, NULL,
3072        "-/", NULL, NULL,
3073        "-", NULL, NULL,
3074        "/", NULL, NULL,
3075    };
3076    const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3077
3078    for (int32_t i=0; i<DATA_length; i+=3) {
3079        const char* ID = DATA[i];
3080        const char* expID = DATA[i+1];
3081        const char* expInvID = DATA[i+2];
3082        UBool expValid = (expInvID != NULL);
3083        if (expID == NULL) {
3084            expID = ID;
3085        }
3086        UParseError pe;
3087        UErrorCode ec = U_ZERO_ERROR;
3088        Transliterator *t =
3089            Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3090        if (U_FAILURE(ec)) {
3091            if (!expValid) {
3092                logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3093            } else {
3094                dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3095            }
3096            delete t;
3097            continue;
3098        }
3099        Transliterator *u = t->createInverse(ec);
3100        if (U_FAILURE(ec)) {
3101            errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3102            delete t;
3103            delete u;
3104            continue;
3105        }
3106        if (t->getID() == expID &&
3107            u->getID() == expInvID) {
3108            logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3109        } else {
3110            errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3111                  t->getID() + " x getInverse() => " + u->getID() +
3112                  ", expected " + expInvID);
3113        }
3114        delete t;
3115        delete u;
3116    }
3117}
3118
3119static const UChar SPACE[]   = {32,0};
3120static const UChar NEWLINE[] = {10,0};
3121static const UChar RETURN[]  = {13,0};
3122static const UChar EMPTY[]   = {0};
3123
3124void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3125                                    const UnicodeString& testRulesForward) {
3126    UnicodeString rules2; t2.toRules(rules2, TRUE);
3127    //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3128    rules2.findAndReplace(SPACE, EMPTY);
3129    rules2.findAndReplace(NEWLINE, EMPTY);
3130    rules2.findAndReplace(RETURN, EMPTY);
3131
3132    UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3133
3134    if (rules2 != testRules) {
3135        errln(label);
3136        logln((UnicodeString)"GENERATED RULES: " + rules2);
3137        logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3138    }
3139}
3140
3141/**
3142 * Mark's toRules test.
3143 */
3144void TransliteratorTest::TestToRulesMark() {
3145    const char* testRules =
3146        "::[[:Latin:][:Mark:]];"
3147        "::NFKD (NFC);"
3148        "::Lower (Lower);"
3149        "a <> \\u03B1;" // alpha
3150        "::NFKC (NFD);"
3151        "::Upper (Lower);"
3152        "::Lower ();"
3153        "::([[:Greek:][:Mark:]]);"
3154        ;
3155    const char* testRulesForward =
3156        "::[[:Latin:][:Mark:]];"
3157        "::NFKD(NFC);"
3158        "::Lower(Lower);"
3159        "a > \\u03B1;"
3160        "::NFKC(NFD);"
3161        "::Upper (Lower);"
3162        "::Lower ();"
3163        ;
3164    const char* testRulesBackward =
3165        "::[[:Greek:][:Mark:]];"
3166        "::Lower (Upper);"
3167        "::NFD(NFKC);"
3168        "\\u03B1 > a;"
3169        "::Lower(Lower);"
3170        "::NFC(NFKD);"
3171        ;
3172    UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3173    UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3174
3175    UParseError pe;
3176    UErrorCode ec = U_ZERO_ERROR;
3177    Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3178    Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3179
3180    if (U_FAILURE(ec)) {
3181        delete t2;
3182        delete t3;
3183        dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3184        return;
3185    }
3186
3187    expect(*t2, source, target);
3188    expect(*t3, target, source);
3189
3190    checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3191    checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3192
3193    delete t2;
3194    delete t3;
3195}
3196
3197/**
3198 * Test Escape and Unescape transliterators.
3199 */
3200void TransliteratorTest::TestEscape() {
3201    UParseError pe;
3202    UErrorCode ec;
3203    Transliterator *t;
3204
3205    ec = U_ZERO_ERROR;
3206    t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3207    if (U_FAILURE(ec)) {
3208        errln((UnicodeString)"FAIL: createInstance");
3209    } else {
3210        expect(*t,
3211               UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3212               "@12Q");
3213    }
3214    delete t;
3215
3216    ec = U_ZERO_ERROR;
3217    t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3218    if (U_FAILURE(ec)) {
3219        errln((UnicodeString)"FAIL: createInstance");
3220    } else {
3221        expect(*t,
3222               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3223               UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3224    }
3225    delete t;
3226
3227    ec = U_ZERO_ERROR;
3228    t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3229    if (U_FAILURE(ec)) {
3230        errln((UnicodeString)"FAIL: createInstance");
3231    } else {
3232        expect(*t,
3233               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3234               UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3235    }
3236    delete t;
3237
3238    ec = U_ZERO_ERROR;
3239    t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3240    if (U_FAILURE(ec)) {
3241        errln((UnicodeString)"FAIL: createInstance");
3242    } else {
3243        expect(*t,
3244               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3245               UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3246    }
3247    delete t;
3248}
3249
3250
3251void TransliteratorTest::TestAnchorMasking(){
3252    UnicodeString rule ("^a > Q; a > q;");
3253    UErrorCode status= U_ZERO_ERROR;
3254    UParseError parseError;
3255
3256    Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3257    if(U_FAILURE(status)){
3258        errln(UnicodeString("FAIL: ") + "ID" +
3259              ".createFromRules() => bad rules" +
3260              /*", parse error " + parseError.code +*/
3261              ", line " + parseError.line +
3262              ", offset " + parseError.offset +
3263              ", context " + prettify(parseError.preContext, TRUE) +
3264              ", rules: " + prettify(rule, TRUE));
3265    }
3266    delete t;
3267}
3268
3269/**
3270 * Make sure display names of variants look reasonable.
3271 */
3272void TransliteratorTest::TestDisplayName() {
3273#if UCONFIG_NO_FORMATTING
3274    logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3275    return;
3276#else
3277    static const char* DATA[] = {
3278        // ID, forward name, reverse name
3279        // Update the text as necessary -- the important thing is
3280        // not the text itself, but how various cases are handled.
3281
3282        // Basic test
3283        "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3284
3285        // Variants
3286        "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3287
3288        // Target-only IDs
3289        "NFC", "Any to NFC", "Any to NFD",
3290    };
3291
3292    int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3293
3294    Locale US("en", "US");
3295
3296    for (int32_t i=0; i<DATA_length; i+=3) {
3297        UnicodeString name;
3298        Transliterator::getDisplayName(DATA[i], US, name);
3299        if (name != DATA[i+1]) {
3300            dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3301                  name + ", expected " + DATA[i+1]);
3302        } else {
3303            logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3304        }
3305        UErrorCode ec = U_ZERO_ERROR;
3306        UParseError pe;
3307        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3308        if (U_FAILURE(ec)) {
3309            delete t;
3310            dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3311            continue;
3312        }
3313        name = Transliterator::getDisplayName(t->getID(), US, name);
3314        if (name != DATA[i+2]) {
3315            dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3316                  name + ", expected " + DATA[i+2]);
3317        } else {
3318            logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3319        }
3320        delete t;
3321    }
3322#endif
3323}
3324
3325void TransliteratorTest::TestSpecialCases(void) {
3326    const UnicodeString registerRules[] = {
3327        "Any-Dev1", "x > X; y > Y;",
3328        "Any-Dev2", "XY > Z",
3329        "Greek-Latin/FAKE",
3330            CharsToUnicodeString
3331            ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3332        "" // END MARKER
3333    };
3334
3335    const UnicodeString testCases[] = {
3336        // NORMALIZATION
3337        // should add more test cases
3338        "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339        "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340        "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341        "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342
3343        // mp -> b BUG
3344        "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345        "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346
3347        // check for devanagari bug
3348        "nfd;Dev1;Dev2;nfc", "xy", "Z",
3349
3350        // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3351        "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3352                 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3353
3354        //TODO: enable this test once Titlecase works right
3355        /*
3356        "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3357                 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3358                 */
3359        "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3360                 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3361        "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362                 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3363
3364        "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365        "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3366
3367         // FORMS OF S
3368        "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369                               CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3370        "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3371                               CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3372        "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3373                        CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3374        "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3375                        CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3376        // Tatiana bug
3377        // Upper: TAT\\u02B9\\u00C2NA
3378        // Lower: tat\\u02B9\\u00E2na
3379        // Title: Tat\\u02B9\\u00E2na
3380        "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3381                 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382        "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3383                 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384        "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3385                 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3386
3387        "" // END MARKER
3388    };
3389
3390    UParseError pos;
3391    int32_t i;
3392    for (i = 0; registerRules[i].length()!=0; i+=2) {
3393        UErrorCode status = U_ZERO_ERROR;
3394
3395        Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3396            registerRules[i+1], UTRANS_FORWARD, pos, status);
3397        if (U_FAILURE(status)) {
3398            dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3399        } else {
3400            Transliterator::registerInstance(t);
3401        }
3402    }
3403    for (i = 0; testCases[i].length()!=0; i+=3) {
3404        UErrorCode ec = U_ZERO_ERROR;
3405        UParseError pe;
3406        const UnicodeString& name = testCases[i];
3407        Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3408        if (U_FAILURE(ec)) {
3409            dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3410            delete t;
3411            continue;
3412        }
3413        const UnicodeString& id = t->getID();
3414        const UnicodeString& source = testCases[i+1];
3415        UnicodeString target;
3416
3417        // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3418
3419        if (testCases[i+2].length() > 0) {
3420            target = testCases[i+2];
3421        } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3422            Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3423        } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3424            Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3425        } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3426            Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3427        } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3428            Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3429        } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3430            target = source;
3431            target.toLower(Locale::getUS());
3432        } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3433            target = source;
3434            target.toUpper(Locale::getUS());
3435        }
3436        if (U_FAILURE(ec)) {
3437            errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3438            continue;
3439        }
3440
3441        expect(*t, source, target);
3442        delete t;
3443    }
3444    for (i = 0; registerRules[i].length()!=0; i+=2) {
3445        Transliterator::unregister(registerRules[i]);
3446    }
3447}
3448
3449char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3450    if (ch <= 0xFFFF) {
3451        sprintf(buffer, "\\u%04x", (int)ch);
3452    } else {
3453        sprintf(buffer, "\\U%08x", (int)ch);
3454    }
3455    return buffer;
3456}
3457
3458void TransliteratorTest::TestSurrogateCasing (void) {
3459    // check that casing handles surrogates
3460    // titlecase is currently defective
3461    char buffer[20];
3462    UChar buffer2[20];
3463    UChar32 dee;
3464    U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3465    UnicodeString DEE(u_totitle(dee));
3466    if (DEE != DESERET_DEE) {
3467        err("Fails titlecase of surrogates");
3468        err(Char32ToEscapedChars(dee, buffer));
3469        err(", ");
3470        errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3471    }
3472
3473    UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3474    UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3475    UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3476    UErrorCode status= U_ZERO_ERROR;
3477
3478    u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3479    if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3480        errln("Fails: Can't uppercase surrogates.");
3481    }
3482
3483    status= U_ZERO_ERROR;
3484    u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3485    if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3486        errln("Fails: Can't lowercase surrogates.");
3487    }
3488}
3489
3490static void _trans(Transliterator& t, const UnicodeString& src,
3491                   UnicodeString& result) {
3492    result = src;
3493    t.transliterate(result);
3494}
3495
3496static void _trans(const UnicodeString& id, const UnicodeString& src,
3497                   UnicodeString& result, UErrorCode ec) {
3498    UParseError pe;
3499    Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3500    if (U_SUCCESS(ec)) {
3501        _trans(*t, src, result);
3502    }
3503    delete t;
3504}
3505
3506static UnicodeString _findMatch(const UnicodeString& source,
3507                                       const UnicodeString* pairs) {
3508    UnicodeString empty;
3509    for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3510        if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3511            return pairs[i+1];
3512        }
3513    }
3514    return empty;
3515}
3516
3517// Check to see that incremental gets at least part way through a reasonable string.
3518
3519void TransliteratorTest::TestIncrementalProgress(void) {
3520    UErrorCode ec = U_ZERO_ERROR;
3521    UnicodeString latinTest = "The Quick Brown Fox.";
3522    UnicodeString devaTest;
3523    _trans("Latin-Devanagari", latinTest, devaTest, ec);
3524    UnicodeString kataTest;
3525    _trans("Latin-Katakana", latinTest, kataTest, ec);
3526    if (U_FAILURE(ec)) {
3527        errln("FAIL: Internal error");
3528        return;
3529    }
3530    const UnicodeString tests[] = {
3531        "Any", latinTest,
3532        "Latin", latinTest,
3533        "Halfwidth", latinTest,
3534        "Devanagari", devaTest,
3535        "Katakana", kataTest,
3536        "" // END MARKER
3537    };
3538
3539    UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3540    int32_t i = 0, j=0, k=0;
3541    int32_t sources = Transliterator::countAvailableSources();
3542    for (i = 0; i < sources; i++) {
3543        UnicodeString source;
3544        Transliterator::getAvailableSource(i, source);
3545        UnicodeString test = _findMatch(source, tests);
3546        if (test.length() == 0) {
3547            logln((UnicodeString)"Skipping " + source + "-X");
3548            continue;
3549        }
3550        int32_t targets = Transliterator::countAvailableTargets(source);
3551        for (j = 0; j < targets; j++) {
3552            UnicodeString target;
3553            Transliterator::getAvailableTarget(j, source, target);
3554            int32_t variants = Transliterator::countAvailableVariants(source, target);
3555            for (k =0; k< variants; k++) {
3556                UnicodeString variant;
3557                UParseError err;
3558                UErrorCode status = U_ZERO_ERROR;
3559
3560                Transliterator::getAvailableVariant(k, source, target, variant);
3561                UnicodeString id = source + "-" + target + "/" + variant;
3562
3563                Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3564                if (U_FAILURE(status)) {
3565                    dataerrln((UnicodeString)"FAIL: Could not create " + id);
3566                    delete t;
3567                    continue;
3568                }
3569                status = U_ZERO_ERROR;
3570                CheckIncrementalAux(t, test);
3571
3572                UnicodeString rev;
3573                _trans(*t, test, rev);
3574                Transliterator *inv = t->createInverse(status);
3575                if (U_FAILURE(status)) {
3576#if UCONFIG_NO_BREAK_ITERATION
3577                    // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3578                    if (id.compare((UnicodeString)"Latin-Thai/") != 0)
3579#endif
3580                        errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3581
3582                    delete t;
3583                    delete inv;
3584                    continue;
3585                }
3586                CheckIncrementalAux(inv, rev);
3587                delete t;
3588                delete inv;
3589            }
3590        }
3591    }
3592}
3593
3594void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3595                                                      const UnicodeString& input) {
3596    UErrorCode ec = U_ZERO_ERROR;
3597    UTransPosition pos;
3598    UnicodeString test = input;
3599
3600    pos.contextStart = 0;
3601    pos.contextLimit = input.length();
3602    pos.start = 0;
3603    pos.limit = input.length();
3604
3605    t->transliterate(test, pos, ec);
3606    if (U_FAILURE(ec)) {
3607        errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3608        return;
3609    }
3610    UBool gotError = FALSE;
3611    (void)gotError;    // Suppress set but not used warning.
3612
3613    // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3614
3615    if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3616        errln((UnicodeString)"No Progress, " +
3617              t->getID() + ": " + formatInput(test, input, pos));
3618        gotError = TRUE;
3619    } else {
3620        logln((UnicodeString)"PASS Progress, " +
3621              t->getID() + ": " + formatInput(test, input, pos));
3622    }
3623    t->finishTransliteration(test, pos);
3624    if (pos.start != pos.limit) {
3625        errln((UnicodeString)"Incomplete, " +
3626              t->getID() + ": " + formatInput(test, input, pos));
3627        gotError = TRUE;
3628    }
3629}
3630
3631void TransliteratorTest::TestFunction() {
3632    // Careful with spacing and ';' here:  Phrase this exactly
3633    // as toRules() is going to return it.  If toRules() changes
3634    // with regard to spacing or ';', then adjust this string.
3635    UnicodeString rule =
3636        "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3637
3638    UParseError pe;
3639    UErrorCode ec = U_ZERO_ERROR;
3640    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3641    if (t == NULL) {
3642        dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3643        return;
3644    }
3645
3646    UnicodeString r;
3647    t->toRules(r, TRUE);
3648    if (r == rule) {
3649        logln((UnicodeString)"OK: toRules() => " + r);
3650    } else {
3651        errln((UnicodeString)"FAIL: toRules() => " + r +
3652              ", expected " + rule);
3653    }
3654
3655    expect(*t, "The Quick Brown Fox",
3656           UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3657
3658    delete t;
3659}
3660
3661void TransliteratorTest::TestInvalidBackRef(void) {
3662    UnicodeString rule =  ". > $1;";
3663    UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3664    UParseError pe;
3665    UErrorCode ec = U_ZERO_ERROR;
3666    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3667    Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3668
3669    if (t != NULL) {
3670        errln("FAIL: createFromRules should have returned NULL");
3671        delete t;
3672    }
3673
3674    if (t2 != NULL) {
3675        errln("FAIL: createFromRules should have returned NULL");
3676        delete t2;
3677    }
3678
3679    if (U_SUCCESS(ec)) {
3680        errln("FAIL: Ok: . > $1; => no error");
3681    } else {
3682        logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3683    }
3684}
3685
3686void TransliteratorTest::TestMulticharStringSet() {
3687    // Basic testing
3688    const char* rule =
3689        "       [{aa}]       > x;"
3690        "         a          > y;"
3691        "       [b{bc}]      > z;"
3692        "[{gd}] { e          > q;"
3693        "         e } [{fg}] > r;" ;
3694
3695    UParseError pe;
3696    UErrorCode ec = U_ZERO_ERROR;
3697    Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3698    if (t == NULL || U_FAILURE(ec)) {
3699        delete t;
3700        errln("FAIL: createFromRules failed");
3701        return;
3702    }
3703
3704    expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3705           "y x yz z d gd de gdq gdqfg ddrfg");
3706    delete t;
3707
3708    // Overlapped string test.  Make sure that when multiple
3709    // strings can match that the longest one is matched.
3710    rule =
3711        "    [a {ab} {abc}]    > x;"
3712        "           b          > y;"
3713        "           c          > z;"
3714        " q [t {st} {rst}] { e > p;" ;
3715
3716    t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3717    if (t == NULL || U_FAILURE(ec)) {
3718        delete t;
3719        errln("FAIL: createFromRules failed");
3720        return;
3721    }
3722
3723    expect(*t, "a ab abc qte qste qrste",
3724           "x x x qtp qstp qrstp");
3725    delete t;
3726}
3727
3728// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3729// BEGIN TestUserFunction support factory
3730
3731Transliterator* _TUFF[4];
3732UnicodeString* _TUFID[4];
3733
3734static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3735                                   Transliterator::Token context) {
3736    return _TUFF[context.integer]->clone();
3737}
3738
3739static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3740    _TUFF[n] = t;
3741    _TUFID[n] = new UnicodeString(ID);
3742    Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3743}
3744
3745static void _TUFUnreg(int32_t n) {
3746    if (_TUFF[n] != NULL) {
3747        Transliterator::unregister(*_TUFID[n]);
3748        delete _TUFF[n];
3749        delete _TUFID[n];
3750    }
3751}
3752
3753// END TestUserFunction support factory
3754// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3755
3756/**
3757 * Test that user-registered transliterators can be used under function
3758 * syntax.
3759 */
3760void TransliteratorTest::TestUserFunction() {
3761
3762    Transliterator* t;
3763    UParseError pe;
3764    UErrorCode ec = U_ZERO_ERROR;
3765
3766    // Setup our factory
3767    int32_t i;
3768    for (i=0; i<4; ++i) {
3769        _TUFF[i] = NULL;
3770    }
3771
3772    // There's no need to register inverses if we don't use them
3773    t = Transliterator::createFromRules("gif",
3774                                        UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3775                                        UTRANS_FORWARD, pe, ec);
3776    if (t == NULL || U_FAILURE(ec)) {
3777        dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3778        return;
3779    }
3780    _TUFReg("Any-gif", t, 0);
3781
3782    t = Transliterator::createFromRules("RemoveCurly",
3783                                        UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3784                                        UTRANS_FORWARD, pe, ec);
3785    if (t == NULL || U_FAILURE(ec)) {
3786        errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3787        goto FAIL;
3788    }
3789    expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3790    _TUFReg("Any-RemoveCurly", t, 1);
3791
3792    logln("Trying &hex");
3793    t = Transliterator::createFromRules("hex2",
3794                                        "(.) > &hex($1);",
3795                                        UTRANS_FORWARD, pe, ec);
3796    if (t == NULL || U_FAILURE(ec)) {
3797        errln("FAIL: createFromRules");
3798        goto FAIL;
3799    }
3800    logln("Registering");
3801    _TUFReg("Any-hex2", t, 2);
3802    t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3803    if (t == NULL || U_FAILURE(ec)) {
3804        errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3805        goto FAIL;
3806    }
3807    expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3808    delete t;
3809
3810    logln("Trying &gif");
3811    t = Transliterator::createFromRules("gif2",
3812                                        "(.) > &Gif(&Hex2($1));",
3813                                        UTRANS_FORWARD, pe, ec);
3814    if (t == NULL || U_FAILURE(ec)) {
3815        errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3816        goto FAIL;
3817    }
3818    logln("Registering");
3819    _TUFReg("Any-gif2", t, 3);
3820    t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3821    if (t == NULL || U_FAILURE(ec)) {
3822        errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3823        goto FAIL;
3824    }
3825    expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3826           "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3827    delete t;
3828
3829    // Test that filters are allowed after &
3830    t = Transliterator::createFromRules("test",
3831                                        "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3832                                        UTRANS_FORWARD, pe, ec);
3833    if (t == NULL || U_FAILURE(ec)) {
3834        errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3835        goto FAIL;
3836    }
3837    expect(*t, "abc",
3838           UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3839    delete t;
3840
3841 FAIL:
3842    for (i=0; i<4; ++i) {
3843        _TUFUnreg(i);
3844    }
3845}
3846
3847/**
3848 * Test the Any-X transliterators.
3849 */
3850void TransliteratorTest::TestAnyX(void) {
3851    UParseError parseError;
3852    UErrorCode status = U_ZERO_ERROR;
3853    Transliterator* anyLatin =
3854        Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3855    if (anyLatin==0) {
3856        dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3857        delete anyLatin;
3858        return;
3859    }
3860
3861    expect(*anyLatin,
3862           CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3863           CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3864
3865    delete anyLatin;
3866}
3867
3868/**
3869 * Test Any-X transliterators with sample letters from all scripts.
3870 */
3871void TransliteratorTest::TestAny(void) {
3872    UErrorCode status = U_ZERO_ERROR;
3873    // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3874    //       function call parameters going on in this test.
3875    UnicodeSet alphabetic("[:alphabetic:]", status);
3876    if (U_FAILURE(status)) {
3877        dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3878        return;
3879    }
3880    alphabetic.freeze();
3881
3882    UnicodeString testString;
3883    for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3884        const char *scriptName = uscript_getShortName((UScriptCode)i);
3885        if (scriptName == NULL) {
3886            errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3887            return;
3888        }
3889
3890        UnicodeSet sample;
3891        sample.applyPropertyAlias("script", scriptName, status);
3892        if (U_FAILURE(status)) {
3893            errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3894            return;
3895        }
3896        sample.retainAll(alphabetic);
3897        for (int32_t count=0; count<5; count++) {
3898            UChar32 c = sample.charAt(count);
3899            if (c == -1) {
3900                break;
3901            }
3902            testString.append(c);
3903        }
3904    }
3905
3906    UParseError parseError;
3907    Transliterator* anyLatin =
3908        Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3909    if (U_FAILURE(status)) {
3910        dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3911        return;
3912    }
3913
3914    logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3915    anyLatin->transliterate(testString);
3916    logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3917    delete anyLatin;
3918}
3919
3920
3921/**
3922 * Test the source and target set API.  These are only implemented
3923 * for RBT and CompoundTransliterator at this time.
3924 */
3925void TransliteratorTest::TestSourceTargetSet() {
3926    UErrorCode ec = U_ZERO_ERROR;
3927
3928    // Rules
3929    const char* r =
3930        "a > b; "
3931        "r [x{lu}] > q;";
3932
3933    // Expected source
3934    UnicodeSet expSrc("[arx{lu}]", ec);
3935
3936    // Expected target
3937    UnicodeSet expTrg("[bq]", ec);
3938
3939    UParseError pe;
3940    Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3941
3942    if (U_FAILURE(ec)) {
3943        delete t;
3944        errln("FAIL: Couldn't set up test");
3945        return;
3946    }
3947
3948    UnicodeSet src; t->getSourceSet(src);
3949    UnicodeSet trg; t->getTargetSet(trg);
3950
3951    if (src == expSrc && trg == expTrg) {
3952        UnicodeString a, b;
3953        logln((UnicodeString)"Ok: " +
3954              r + " => source = " + src.toPattern(a, TRUE) +
3955              ", target = " + trg.toPattern(b, TRUE));
3956    } else {
3957        UnicodeString a, b, c, d;
3958        errln((UnicodeString)"FAIL: " +
3959              r + " => source = " + src.toPattern(a, TRUE) +
3960              ", expected " + expSrc.toPattern(b, TRUE) +
3961              "; target = " + trg.toPattern(c, TRUE) +
3962              ", expected " + expTrg.toPattern(d, TRUE));
3963    }
3964
3965    delete t;
3966}
3967
3968/**
3969 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3970 */
3971void TransliteratorTest::TestPatternWhiteSpace() {
3972    // Rules
3973    const char* r = "a > \\u200E b;";
3974
3975    UErrorCode ec = U_ZERO_ERROR;
3976    UParseError pe;
3977    Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3978
3979    if (U_FAILURE(ec)) {
3980        errln("FAIL: Couldn't set up test");
3981    } else {
3982        expect(*t, "a", "b");
3983    }
3984    delete t;
3985
3986    // UnicodeSet
3987    ec = U_ZERO_ERROR;
3988    UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3989
3990    if (U_FAILURE(ec)) {
3991        errln("FAIL: Couldn't set up test");
3992    } else {
3993        if (set.contains(0x200E)) {
3994            errln("FAIL: U+200E not being ignored by UnicodeSet");
3995        }
3996    }
3997}
3998//======================================================================
3999// this method is in TestUScript.java
4000//======================================================================
4001void TransliteratorTest::TestAllCodepoints(){
4002    UScriptCode code= USCRIPT_INVALID_CODE;
4003    char id[256]={'\0'};
4004    char abbr[256]={'\0'};
4005    char newId[256]={'\0'};
4006    char newAbbrId[256]={'\0'};
4007    char oldId[256]={'\0'};
4008    char oldAbbrId[256]={'\0'};
4009
4010    UErrorCode status =U_ZERO_ERROR;
4011    UParseError pe;
4012
4013    for(uint32_t i = 0; i<=0x10ffff; i++){
4014        code =  uscript_getScript(i,&status);
4015        if(code == USCRIPT_INVALID_CODE){
4016            dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4017        }
4018        const char* myId = uscript_getName(code);
4019        if(!myId) {
4020          dataerrln("Valid script code returned NULL name. Check your data!");
4021          return;
4022        }
4023        uprv_strcpy(id,myId);
4024        uprv_strcpy(abbr,uscript_getShortName(code));
4025
4026        uprv_strcpy(newId,"[:");
4027        uprv_strcat(newId,id);
4028        uprv_strcat(newId,":];NFD");
4029
4030        uprv_strcpy(newAbbrId,"[:");
4031        uprv_strcat(newAbbrId,abbr);
4032        uprv_strcat(newAbbrId,":];NFD");
4033
4034        if(uprv_strcmp(newId,oldId)!=0){
4035            Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4036            if(t==NULL || U_FAILURE(status)){
4037                dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4038            }
4039            delete t;
4040        }
4041        if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4042            Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4043            if(t==NULL || U_FAILURE(status)){
4044                dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4045            }
4046            delete t;
4047        }
4048        uprv_strcpy(oldId,newId);
4049        uprv_strcpy(oldAbbrId, newAbbrId);
4050
4051    }
4052
4053}
4054
4055#define TEST_TRANSLIT_ID(id, cls) { \
4056  UErrorCode ec = U_ZERO_ERROR; \
4057  Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4058  if (U_FAILURE(ec)) { \
4059    dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4060  } else { \
4061    if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4062      errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4063    } \
4064    /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4065  } \
4066  delete t; \
4067}
4068
4069#define TEST_TRANSLIT_RULE(rule, cls) { \
4070  UErrorCode ec = U_ZERO_ERROR; \
4071  UParseError pe; \
4072  Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4073  if (U_FAILURE(ec)) { \
4074    errln("FAIL: Couldn't create " rule); \
4075  } else { \
4076    if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4077      errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4078    } \
4079    /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4080  } \
4081  delete t; \
4082}
4083
4084void TransliteratorTest::TestBoilerplate() {
4085    TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4086    TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4087    TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4088    TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4089    TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4090    TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4091    TEST_TRANSLIT_ID("Null", NullTransliterator);
4092    TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4093    TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4094    TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4095    TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4096    TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4097    TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4098}
4099
4100void TransliteratorTest::TestAlternateSyntax() {
4101    // U+2206 == &
4102    // U+2190 == <
4103    // U+2192 == >
4104    // U+2194 == <>
4105    expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4106           "abc",
4107           "xbz");
4108    expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4109           CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4110           UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4111}
4112
4113static const char* BEGIN_END_RULES[] = {
4114    // [0]
4115    "abc > xy;"
4116    "aba > z;",
4117
4118    // [1]
4119/*
4120    "::BEGIN;"
4121    "abc > xy;"
4122    "::END;"
4123    "::BEGIN;"
4124    "aba > z;"
4125    "::END;",
4126*/
4127    "", // test case commented out below, this is here to keep from messing up the indexes
4128
4129    // [2]
4130/*
4131    "abc > xy;"
4132    "::BEGIN;"
4133    "aba > z;"
4134    "::END;",
4135*/
4136    "", // test case commented out below, this is here to keep from messing up the indexes
4137
4138    // [3]
4139/*
4140    "::BEGIN;"
4141    "abc > xy;"
4142    "::END;"
4143    "aba > z;",
4144*/
4145    "", // test case commented out below, this is here to keep from messing up the indexes
4146
4147    // [4]
4148    "abc > xy;"
4149    "::Null;"
4150    "aba > z;",
4151
4152    // [5]
4153    "::Upper;"
4154    "ABC > xy;"
4155    "AB > x;"
4156    "C > z;"
4157    "::Upper;"
4158    "XYZ > p;"
4159    "XY > q;"
4160    "Z > r;"
4161    "::Upper;",
4162
4163    // [6]
4164    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4165    "$delim = [\\-$ws];"
4166    "$ws $delim* > ' ';"
4167    "'-' $delim* > '-';",
4168
4169    // [7]
4170    "::Null;"
4171    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4172    "$delim = [\\-$ws];"
4173    "$ws $delim* > ' ';"
4174    "'-' $delim* > '-';",
4175
4176    // [8]
4177    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4178    "$delim = [\\-$ws];"
4179    "$ws $delim* > ' ';"
4180    "'-' $delim* > '-';"
4181    "::Null;",
4182
4183    // [9]
4184    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4185    "$delim = [\\-$ws];"
4186    "::Null;"
4187    "$ws $delim* > ' ';"
4188    "'-' $delim* > '-';",
4189
4190    // [10]
4191/*
4192    "::BEGIN;"
4193    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4194    "$delim = [\\-$ws];"
4195    "::END;"
4196    "$ws $delim* > ' ';"
4197    "'-' $delim* > '-';",
4198*/
4199    "", // test case commented out below, this is here to keep from messing up the indexes
4200
4201    // [11]
4202/*
4203    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4204    "$delim = [\\-$ws];"
4205    "::BEGIN;"
4206    "$ws $delim* > ' ';"
4207    "'-' $delim* > '-';"
4208    "::END;",
4209*/
4210    "", // test case commented out below, this is here to keep from messing up the indexes
4211
4212    // [12]
4213/*
4214    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4215    "$delim = [\\-$ws];"
4216    "$ab = [ab];"
4217    "::BEGIN;"
4218    "$ws $delim* > ' ';"
4219    "'-' $delim* > '-';"
4220    "::END;"
4221    "::BEGIN;"
4222    "$ab { ' ' } $ab > '-';"
4223    "c { ' ' > ;"
4224    "::END;"
4225    "::BEGIN;"
4226    "'a-a' > a\\%|a;"
4227    "::END;",
4228*/
4229    "", // test case commented out below, this is here to keep from messing up the indexes
4230
4231    // [13]
4232    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4233    "$delim = [\\-$ws];"
4234    "$ab = [ab];"
4235    "::Null;"
4236    "$ws $delim* > ' ';"
4237    "'-' $delim* > '-';"
4238    "::Null;"
4239    "$ab { ' ' } $ab > '-';"
4240    "c { ' ' > ;"
4241    "::Null;"
4242    "'a-a' > a\\%|a;",
4243
4244    // [14]
4245/*
4246    "::[abc];"
4247    "::BEGIN;"
4248    "abc > xy;"
4249    "::END;"
4250    "::BEGIN;"
4251    "aba > yz;"
4252    "::END;"
4253    "::Upper;",
4254*/
4255    "", // test case commented out below, this is here to keep from messing up the indexes
4256
4257    // [15]
4258    "::[abc];"
4259    "abc > xy;"
4260    "::Null;"
4261    "aba > yz;"
4262    "::Upper;",
4263
4264    // [16]
4265/*
4266    "::[abc];"
4267    "::BEGIN;"
4268    "abc <> xy;"
4269    "::END;"
4270    "::BEGIN;"
4271    "aba <> yz;"
4272    "::END;"
4273    "::Upper(Lower);"
4274    "::([XYZ]);"
4275*/
4276    "", // test case commented out below, this is here to keep from messing up the indexes
4277
4278    // [17]
4279    "::[abc];"
4280    "abc <> xy;"
4281    "::Null;"
4282    "aba <> yz;"
4283    "::Upper(Lower);"
4284    "::([XYZ]);"
4285};
4286static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4287
4288/*
4289(This entire test is commented out below and will need some heavy revision when we re-add
4290the ::BEGIN/::END stuff)
4291static const char* BOGUS_BEGIN_END_RULES[] = {
4292    // [7]
4293    "::BEGIN;"
4294    "abc > xy;"
4295    "::BEGIN;"
4296    "aba > z;"
4297    "::END;"
4298    "::END;",
4299
4300    // [8]
4301    "abc > xy;"
4302    " aba > z;"
4303    "::END;",
4304
4305    // [9]
4306    "::BEGIN;"
4307    "::Upper;"
4308    "::END;"
4309};
4310static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4311*/
4312
4313static const char* BEGIN_END_TEST_CASES[] = {
4314    // rules             input                   expected output
4315    BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4316//    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4317//    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4318//    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4319    BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4320    BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4321
4322    BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4323    BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4324    BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4325    BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4326//    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4327//    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4328//    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4329//    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4330//    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4331    BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4332    BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4333    BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4334
4335//    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336    BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4337//    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4338    BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4339};
4340static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4341
4342void TransliteratorTest::TestBeginEnd() {
4343    // run through the list of test cases above
4344    int32_t i = 0;
4345    for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4346        expect((UnicodeString)"Test case #" + (i / 3),
4347               UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4348               UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4349               UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4350    }
4351
4352    // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4353    UParseError parseError;
4354    UErrorCode status = U_ZERO_ERROR;
4355    Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4356            UTRANS_REVERSE, parseError, status);
4357    if (reversed == 0 || U_FAILURE(status)) {
4358        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4359    } else {
4360        expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4361    }
4362    delete reversed;
4363
4364    // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4365    // that all of them cause errors
4366/*
4367(commented out until we have the real ::BEGIN/::END stuff in place
4368    for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4369        UParseError parseError;
4370        UErrorCode status = U_ZERO_ERROR;
4371        Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4372                UTRANS_FORWARD, parseError, status);
4373        if (!U_FAILURE(status)) {
4374            delete t;
4375            errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4376        }
4377    }
4378*/
4379}
4380
4381void TransliteratorTest::TestBeginEndToRules() {
4382    // run through the same list of test cases we used above, but this time, instead of just
4383    // instantiating a Transliterator from the rules and running the test against it, we instantiate
4384    // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4385    // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4386    // to (i.e., does the same thing as) the original rule set
4387    for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4388        UParseError parseError;
4389        UErrorCode status = U_ZERO_ERROR;
4390        Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4391                UTRANS_FORWARD, parseError, status);
4392        if (U_FAILURE(status)) {
4393            reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4394        } else {
4395            UnicodeString rules;
4396            t->toRules(rules, TRUE);
4397            Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4398                    UTRANS_FORWARD, parseError, status);
4399            if (U_FAILURE(status)) {
4400                reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4401                        parseError, status);
4402                delete t;
4403            } else {
4404                expect(*t2,
4405                       UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4406                       UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4407                delete t;
4408                delete t2;
4409            }
4410        }
4411    }
4412
4413    // do the same thing for the reversible test case
4414    UParseError parseError;
4415    UErrorCode status = U_ZERO_ERROR;
4416    Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4417            UTRANS_REVERSE, parseError, status);
4418    if (U_FAILURE(status)) {
4419        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4420    } else {
4421        UnicodeString rules;
4422        reversed->toRules(rules, FALSE);
4423        Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4424                parseError, status);
4425        if (U_FAILURE(status)) {
4426            reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4427                    parseError, status);
4428            delete reversed;
4429        } else {
4430            expect(*reversed2,
4431                   UnicodeString("xy XY XYZ yz YZ"),
4432                   UnicodeString("xy abc xaba yz aba"));
4433            delete reversed;
4434            delete reversed2;
4435        }
4436    }
4437}
4438
4439void TransliteratorTest::TestRegisterAlias() {
4440    UnicodeString longID("Lower;[aeiou]Upper");
4441    UnicodeString shortID("Any-CapVowels");
4442    UnicodeString reallyShortID("CapVowels");
4443
4444    Transliterator::registerAlias(shortID, longID);
4445
4446    UErrorCode err = U_ZERO_ERROR;
4447    Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4448    if (U_FAILURE(err)) {
4449        errln("Failed to instantiate transliterator with long ID");
4450        Transliterator::unregister(shortID);
4451        return;
4452    }
4453    Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4454    if (U_FAILURE(err)) {
4455        errln("Failed to instantiate transliterator with short ID");
4456        delete t1;
4457        Transliterator::unregister(shortID);
4458        return;
4459    }
4460
4461    if (t1->getID() != longID)
4462        errln("Transliterator instantiated with long ID doesn't have long ID");
4463    if (t2->getID() != reallyShortID)
4464        errln("Transliterator instantiated with short ID doesn't have short ID");
4465
4466    UnicodeString rules1;
4467    UnicodeString rules2;
4468
4469    t1->toRules(rules1, TRUE);
4470    t2->toRules(rules2, TRUE);
4471    if (rules1 != rules2)
4472        errln("Alias transliterators aren't the same");
4473
4474    delete t1;
4475    delete t2;
4476    Transliterator::unregister(shortID);
4477
4478    t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4479    if (U_SUCCESS(err)) {
4480        errln("Instantiation with short ID succeeded after short ID was unregistered");
4481        delete t1;
4482    }
4483
4484    // try the same thing again, but this time with something other than
4485    // an instance of CompoundTransliterator
4486    UnicodeString realID("Latin-Greek");
4487    UnicodeString fakeID("Latin-dlgkjdflkjdl");
4488    Transliterator::registerAlias(fakeID, realID);
4489
4490    err = U_ZERO_ERROR;
4491    t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4492    if (U_FAILURE(err)) {
4493        dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4494        Transliterator::unregister(realID);
4495        return;
4496    }
4497    t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4498    if (U_FAILURE(err)) {
4499        errln("Failed to instantiate transliterator with fake ID");
4500        delete t1;
4501        Transliterator::unregister(realID);
4502        return;
4503    }
4504
4505    t1->toRules(rules1, TRUE);
4506    t2->toRules(rules2, TRUE);
4507    if (rules1 != rules2)
4508        errln("Alias transliterators aren't the same");
4509
4510    delete t1;
4511    delete t2;
4512    Transliterator::unregister(fakeID);
4513}
4514
4515void TransliteratorTest::TestRuleStripping() {
4516    /*
4517#
4518\uE001>\u0C01; # SIGN
4519    */
4520    static const UChar rule[] = {
4521        0x0023,0x0020,0x000D,0x000A,
4522        0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4523    };
4524    static const UChar expectedRule[] = {
4525        0xE001,0x003E,0x0C01,0x003B,0
4526    };
4527    UChar result[sizeof(rule)/sizeof(rule[0])];
4528    UErrorCode status = U_ZERO_ERROR;
4529    int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4530    if (len != u_strlen(expectedRule)) {
4531        errln("utrans_stripRules return len = %d", len);
4532    }
4533    if (u_strncmp(expectedRule, result, len) != 0) {
4534        errln("utrans_stripRules did not return expected string");
4535    }
4536}
4537
4538/**
4539 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4540 */
4541void TransliteratorTest::TestHalfwidthFullwidth(void) {
4542    UParseError parseError;
4543    UErrorCode status = U_ZERO_ERROR;
4544    Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4545    Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4546    if (hf == 0 || fh == 0) {
4547        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4548        delete hf;
4549        delete fh;
4550        return;
4551    }
4552
4553    // Array of 2n items
4554    // Each item is
4555    //   "hf"|"fh"|"both",
4556    //   <Halfwidth>,
4557    //   <Fullwidth>
4558    const char* DATA[] = {
4559        "both",
4560        "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4561        "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4562    };
4563    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4564
4565    for (int32_t i=0; i<DATA_length; i+=3) {
4566        UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4567        UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4568        switch (*DATA[i]) {
4569        case 0x68: //'h': // Halfwidth-Fullwidth only
4570            expect(*hf, h, f);
4571            break;
4572        case 0x66: //'f': // Fullwidth-Halfwidth only
4573            expect(*fh, f, h);
4574            break;
4575        case 0x62: //'b': // both directions
4576            expect(*hf, h, f);
4577            expect(*fh, f, h);
4578            break;
4579        }
4580    }
4581    delete hf;
4582    delete fh;
4583}
4584
4585
4586    /**
4587     *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4588     *              TODO: confirm that the expected results are correct.
4589     *              For now, test just confirms that C++ and Java give identical results.
4590     */
4591void TransliteratorTest::TestThai(void) {
4592#if !UCONFIG_NO_BREAK_ITERATION
4593    UParseError parseError;
4594    UErrorCode status = U_ZERO_ERROR;
4595    Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4596    if (tr == 0) {
4597        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4598        return;
4599    }
4600    if (U_FAILURE(status)) {
4601        errln("FAIL: createInstance failed with %s", u_errorName(status));
4602        return;
4603    }
4604    const char *thaiText =
4605        "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4606        "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4607        "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4608        "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4609        "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4610        "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4611        "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4612        "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4613        "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4614        "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4615        "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4616        "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4617        "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4618        "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4619        "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4620        "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4621        "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4622        "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4623        "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4624        "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4625        "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4626        "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4627        "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4628        "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4629        " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4630        "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4631        "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4632        " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4633        "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4634        "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4635
4636    const char *latinText =
4637        "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4638        "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4639        "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4640        "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4641        "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4642        " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4643        "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4644        "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4645        "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4646        "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4647        "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4648        "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4649        " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4650        "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4651        " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4652        "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4653        "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4654        "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4655
4656
4657    UnicodeString  xlitText(thaiText);
4658    xlitText = xlitText.unescape();
4659    tr->transliterate(xlitText);
4660
4661    UnicodeString expectedText(latinText);
4662    expectedText = expectedText.unescape();
4663    expect(*tr, xlitText, expectedText);
4664
4665    delete tr;
4666#endif
4667}
4668
4669
4670//======================================================================
4671// Support methods
4672//======================================================================
4673void TransliteratorTest::expectT(const UnicodeString& id,
4674                                 const UnicodeString& source,
4675                                 const UnicodeString& expectedResult) {
4676    UErrorCode ec = U_ZERO_ERROR;
4677    UParseError pe;
4678    Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4679    if (U_FAILURE(ec)) {
4680        errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4681        delete t;
4682        return;
4683    }
4684    expect(*t, source, expectedResult);
4685    delete t;
4686}
4687
4688void TransliteratorTest::reportParseError(const UnicodeString& message,
4689                                          const UParseError& parseError,
4690                                          const UErrorCode& status) {
4691    dataerrln(message +
4692          /*", parse error " + parseError.code +*/
4693          ", line " + parseError.line +
4694          ", offset " + parseError.offset +
4695          ", pre-context " + prettify(parseError.preContext, TRUE) +
4696          ", post-context " + prettify(parseError.postContext,TRUE) +
4697          ", Error: " + u_errorName(status));
4698}
4699
4700void TransliteratorTest::expect(const UnicodeString& rules,
4701                                const UnicodeString& source,
4702                                const UnicodeString& expectedResult,
4703                                UTransPosition *pos) {
4704    expect("<ID>", rules, source, expectedResult, pos);
4705}
4706
4707void TransliteratorTest::expect(const UnicodeString& id,
4708                                const UnicodeString& rules,
4709                                const UnicodeString& source,
4710                                const UnicodeString& expectedResult,
4711                                UTransPosition *pos) {
4712    UErrorCode status = U_ZERO_ERROR;
4713    UParseError parseError;
4714    Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4715    if (U_FAILURE(status)) {
4716        reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4717    } else {
4718        expect(*t, source, expectedResult, pos);
4719    }
4720    delete t;
4721}
4722
4723void TransliteratorTest::expect(const Transliterator& t,
4724                                const UnicodeString& source,
4725                                const UnicodeString& expectedResult,
4726                                const Transliterator& reverseTransliterator) {
4727    expect(t, source, expectedResult);
4728    expect(reverseTransliterator, expectedResult, source);
4729}
4730
4731void TransliteratorTest::expect(const Transliterator& t,
4732                                const UnicodeString& source,
4733                                const UnicodeString& expectedResult,
4734                                UTransPosition *pos) {
4735    if (pos == 0) {
4736        UnicodeString result(source);
4737        t.transliterate(result);
4738        expectAux(t.getID() + ":String", source, result, expectedResult);
4739    }
4740    UTransPosition index={0, 0, 0, 0};
4741    if (pos != 0) {
4742        index = *pos;
4743    }
4744
4745    UnicodeString rsource(source);
4746    if (pos == 0) {
4747        t.transliterate(rsource);
4748    } else {
4749        // Do it all at once -- below we do it incrementally
4750        t.finishTransliteration(rsource, *pos);
4751    }
4752    expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4753
4754    // Test keyboard (incremental) transliteration -- this result
4755    // must be the same after we finalize (see below).
4756    UnicodeString log;
4757    rsource.remove();
4758    if (pos != 0) {
4759        rsource = source;
4760        formatInput(log, rsource, index);
4761        log.append(" -> ");
4762        UErrorCode status = U_ZERO_ERROR;
4763        t.transliterate(rsource, index, status);
4764        formatInput(log, rsource, index);
4765    } else {
4766        for (int32_t i=0; i<source.length(); ++i) {
4767            if (i != 0) {
4768                log.append(" + ");
4769            }
4770            log.append(source.charAt(i)).append(" -> ");
4771            UErrorCode status = U_ZERO_ERROR;
4772            t.transliterate(rsource, index, source.charAt(i), status);
4773            formatInput(log, rsource, index);
4774        }
4775    }
4776
4777    // As a final step in keyboard transliteration, we must call
4778    // transliterate to finish off any pending partial matches that
4779    // were waiting for more input.
4780    t.finishTransliteration(rsource, index);
4781    log.append(" => ").append(rsource);
4782
4783    expectAux(t.getID() + ":Keyboard", log,
4784              rsource == expectedResult,
4785              expectedResult);
4786}
4787
4788
4789/**
4790 * @param appendTo result is appended to this param.
4791 * @param input the string being transliterated
4792 * @param pos the index struct
4793 */
4794UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4795                                               const UnicodeString& input,
4796                                               const UTransPosition& pos) {
4797    // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4798    // the {} indicate the context start and limit, and the ||
4799    // indicate the start and limit.
4800    if (0 <= pos.contextStart &&
4801        pos.contextStart <= pos.start &&
4802        pos.start <= pos.limit &&
4803        pos.limit <= pos.contextLimit &&
4804        pos.contextLimit <= input.length()) {
4805
4806        UnicodeString a, b, c, d, e;
4807        input.extractBetween(0, pos.contextStart, a);
4808        input.extractBetween(pos.contextStart, pos.start, b);
4809        input.extractBetween(pos.start, pos.limit, c);
4810        input.extractBetween(pos.limit, pos.contextLimit, d);
4811        input.extractBetween(pos.contextLimit, input.length(), e);
4812        appendTo.append(a).append((UChar)123/*{*/).append(b).
4813            append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4814            append((UChar)125/*}*/).append(e);
4815    } else {
4816        appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4817                        pos.contextStart + ", s=" + pos.start + ", l=" +
4818                        pos.limit + ", cl=" + pos.contextLimit + "} on " +
4819                        input);
4820    }
4821    return appendTo;
4822}
4823
4824void TransliteratorTest::expectAux(const UnicodeString& tag,
4825                                   const UnicodeString& source,
4826                                   const UnicodeString& result,
4827                                   const UnicodeString& expectedResult) {
4828    expectAux(tag, source + " -> " + result,
4829              result == expectedResult,
4830              expectedResult);
4831}
4832
4833void TransliteratorTest::expectAux(const UnicodeString& tag,
4834                                   const UnicodeString& summary, UBool pass,
4835                                   const UnicodeString& expectedResult) {
4836    if (pass) {
4837        logln(UnicodeString("(")+tag+") " + prettify(summary));
4838    } else {
4839        dataerrln(UnicodeString("FAIL: (")+tag+") "
4840              + prettify(summary)
4841              + ", expected " + prettify(expectedResult));
4842    }
4843}
4844
4845#endif /* #if !UCONFIG_NO_TRANSLITERATION */
4846