1/*
2**********************************************************************
3*   Copyright (C) 1999-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   11/10/99    aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "transtst.h"
16#include "unicode/locid.h"
17#include "unicode/dtfmtsym.h"
18#include "unicode/normlzr.h"
19#include "unicode/translit.h"
20#include "unicode/uchar.h"
21#include "unicode/unifilt.h"
22#include "unicode/uniset.h"
23#include "unicode/ustring.h"
24#include "unicode/usetiter.h"
25#include "unicode/uscript.h"
26#include "unicode/utf16.h"
27#include "cpdtrans.h"
28#include "nultrans.h"
29#include "rbt.h"
30#include "rbt_pars.h"
31#include "anytrans.h"
32#include "esctrn.h"
33#include "name2uni.h"
34#include "nortrans.h"
35#include "remtrans.h"
36#include "titletrn.h"
37#include "tolowtrn.h"
38#include "toupptrn.h"
39#include "unesctrn.h"
40#include "uni2name.h"
41#include "cstring.h"
42#include "cmemory.h"
43#include <stdio.h>
44
45/***********************************************************************
46
47                     HOW TO USE THIS TEST FILE
48                               -or-
49                  How I developed on two platforms
50                without losing (too much of) my mind
51
52
531. Add new tests by copying/pasting/changing existing tests.  On Java,
54   any public void method named Test...() taking no parameters becomes
55   a test.  On C++, you need to modify the header and add a line to
56   the runIndexedTest() dispatch method.
57
582. Make liberal use of the expect() method; it is your friend.
59
603. The tests in this file exactly match those in a sister file on the
61   other side.  The two files are:
62
63   icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
64   icu4c:  source/test/intltest/transtst.cpp
65
66                  ==> THIS IS THE IMPORTANT PART <==
67
68   When you add a test in this file, add it in TransliteratorTest.java
69   too.  Give it the same name and put it in the same relative place.
70   This makes maintenance a lot simpler for any poor soul who ends up
71   trying to synchronize the tests between icu4j and icu4c.
72
734. If you MUST enter a test that is NOT paralleled in the sister file,
74   then add it in the special non-mirrored section.  These are
75   labeled
76
77     "icu4j ONLY"
78
79   or
80
81     "icu4c ONLY"
82
83   Make sure you document the reason the test is here and not there.
84
85
86Thank you.
87The Management
88***********************************************************************/
89
90// Define character constants thusly to be EBCDIC-friendly
91enum {
92    LEFT_BRACE=((UChar)0x007B), /*{*/
93    PIPE      =((UChar)0x007C), /*|*/
94    ZERO      =((UChar)0x0030), /*0*/
95    UPPER_A   =((UChar)0x0041)  /*A*/
96};
97
98TransliteratorTest::TransliteratorTest()
99:   DESERET_DEE((UChar32)0x10414),
100    DESERET_dee((UChar32)0x1043C)
101{
102}
103
104TransliteratorTest::~TransliteratorTest() {}
105
106void
107TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
108                                   const char* &name, char* /*par*/) {
109    switch (index) {
110        TESTCASE(0,TestInstantiation);
111        TESTCASE(1,TestSimpleRules);
112        TESTCASE(2,TestRuleBasedInverse);
113        TESTCASE(3,TestKeyboard);
114        TESTCASE(4,TestKeyboard2);
115        TESTCASE(5,TestKeyboard3);
116        TESTCASE(6,TestArabic);
117        TESTCASE(7,TestCompoundKana);
118        TESTCASE(8,TestCompoundHex);
119        TESTCASE(9,TestFiltering);
120        TESTCASE(10,TestInlineSet);
121        TESTCASE(11,TestPatternQuoting);
122        TESTCASE(12,TestJ277);
123        TESTCASE(13,TestJ243);
124        TESTCASE(14,TestJ329);
125        TESTCASE(15,TestSegments);
126        TESTCASE(16,TestCursorOffset);
127        TESTCASE(17,TestArbitraryVariableValues);
128        TESTCASE(18,TestPositionHandling);
129        TESTCASE(19,TestHiraganaKatakana);
130        TESTCASE(20,TestCopyJ476);
131        TESTCASE(21,TestAnchors);
132        TESTCASE(22,TestInterIndic);
133        TESTCASE(23,TestFilterIDs);
134        TESTCASE(24,TestCaseMap);
135        TESTCASE(25,TestNameMap);
136        TESTCASE(26,TestLiberalizedID);
137        TESTCASE(27,TestCreateInstance);
138        TESTCASE(28,TestNormalizationTransliterator);
139        TESTCASE(29,TestCompoundRBT);
140        TESTCASE(30,TestCompoundFilter);
141        TESTCASE(31,TestRemove);
142        TESTCASE(32,TestToRules);
143        TESTCASE(33,TestContext);
144        TESTCASE(34,TestSupplemental);
145        TESTCASE(35,TestQuantifier);
146        TESTCASE(36,TestSTV);
147        TESTCASE(37,TestCompoundInverse);
148        TESTCASE(38,TestNFDChainRBT);
149        TESTCASE(39,TestNullInverse);
150        TESTCASE(40,TestAliasInverseID);
151        TESTCASE(41,TestCompoundInverseID);
152        TESTCASE(42,TestUndefinedVariable);
153        TESTCASE(43,TestEmptyContext);
154        TESTCASE(44,TestCompoundFilterID);
155        TESTCASE(45,TestPropertySet);
156        TESTCASE(46,TestNewEngine);
157        TESTCASE(47,TestQuantifiedSegment);
158        TESTCASE(48,TestDevanagariLatinRT);
159        TESTCASE(49,TestTeluguLatinRT);
160        TESTCASE(50,TestCompoundLatinRT);
161        TESTCASE(51,TestSanskritLatinRT);
162        TESTCASE(52,TestLocaleInstantiation);
163        TESTCASE(53,TestTitleAccents);
164        TESTCASE(54,TestLocaleResource);
165        TESTCASE(55,TestParseError);
166        TESTCASE(56,TestOutputSet);
167        TESTCASE(57,TestVariableRange);
168        TESTCASE(58,TestInvalidPostContext);
169        TESTCASE(59,TestIDForms);
170        TESTCASE(60,TestToRulesMark);
171        TESTCASE(61,TestEscape);
172        TESTCASE(62,TestAnchorMasking);
173        TESTCASE(63,TestDisplayName);
174        TESTCASE(64,TestSpecialCases);
175#if !UCONFIG_NO_FILE_IO
176        TESTCASE(65,TestIncrementalProgress);
177#endif
178        TESTCASE(66,TestSurrogateCasing);
179        TESTCASE(67,TestFunction);
180        TESTCASE(68,TestInvalidBackRef);
181        TESTCASE(69,TestMulticharStringSet);
182        TESTCASE(70,TestUserFunction);
183        TESTCASE(71,TestAnyX);
184        TESTCASE(72,TestSourceTargetSet);
185        TESTCASE(73,TestGurmukhiDevanagari);
186        TESTCASE(74,TestPatternWhiteSpace);
187        TESTCASE(75,TestAllCodepoints);
188        TESTCASE(76,TestBoilerplate);
189        TESTCASE(77,TestAlternateSyntax);
190        TESTCASE(78,TestBeginEnd);
191        TESTCASE(79,TestBeginEndToRules);
192        TESTCASE(80,TestRegisterAlias);
193        TESTCASE(81,TestRuleStripping);
194        TESTCASE(82,TestHalfwidthFullwidth);
195        TESTCASE(83,TestThai);
196        TESTCASE(84,TestAny);
197        default: name = ""; break;
198    }
199}
200
201/**
202 * Make sure every system transliterator can be instantiated.
203 *
204 * ALSO test that the result of toRules() for each rule is a valid
205 * rule.  Do this here so we don't have to have another test that
206 * instantiates everything as well.
207 */
208void TransliteratorTest::TestInstantiation() {
209    UErrorCode ec = U_ZERO_ERROR;
210    StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
211    assertSuccess("getAvailableIDs()", ec);
212    assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
213    int32_t n = Transliterator::countAvailableIDs();
214    assertTrue("getAvailableIDs().count()==countAvailableIDs()",
215               avail->count(ec) == n);
216    assertSuccess("count()", ec);
217    UnicodeString name;
218    for (int32_t i=0; i<n; ++i) {
219        const UnicodeString& id = *avail->snext(ec);
220        if (!assertSuccess("snext()", ec) ||
221            !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
222            break;
223        }
224        UnicodeString id2 = Transliterator::getAvailableID(i);
225        if (id.length() < 1) {
226            errln(UnicodeString("FAIL: getAvailableID(") +
227                  i + ") returned empty string");
228            continue;
229        }
230        if (id != id2) {
231            errln(UnicodeString("FAIL: getAvailableID(") +
232                  i + ") != getAvailableIDs().snext()");
233            continue;
234        }
235        UParseError parseError;
236        UErrorCode status = U_ZERO_ERROR;
237        Transliterator* t = Transliterator::createInstance(id,
238                              UTRANS_FORWARD, parseError,status);
239        name.truncate(0);
240        Transliterator::getDisplayName(id, name);
241        if (t == 0) {
242#if UCONFIG_NO_BREAK_ITERATION
243            // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
244            if (id.compare((UnicodeString)"Thai-Latin") != 0)
245#endif
246                dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
247                      /*", parse error " + parseError.code +*/
248                      ", line " + parseError.line +
249                      ", offset " + parseError.offset +
250                      ", pre-context " + prettify(parseError.preContext, TRUE) +
251                      ", post-context " +prettify(parseError.postContext,TRUE) +
252                      ", Error: " + u_errorName(status));
253                // When createInstance fails, it deletes the failing
254                // entry from the available ID list.  We detect this
255                // here by looking for a change in countAvailableIDs.
256            int32_t nn = Transliterator::countAvailableIDs();
257            if (nn == (n - 1)) {
258                n = nn;
259                --i; // Compensate for deleted entry
260            }
261        } else {
262            logln(UnicodeString("OK: ") + name + " (" + id + ")");
263
264            // Now test toRules
265            UnicodeString rules;
266            t->toRules(rules, TRUE);
267            Transliterator *u = Transliterator::createFromRules("x",
268                                    rules, UTRANS_FORWARD, parseError,status);
269            if (u == 0) {
270                errln(UnicodeString("FAIL: ") + id +
271                      ".createFromRules() => bad rules" +
272                      /*", parse error " + parseError.code +*/
273                      ", line " + parseError.line +
274                      ", offset " + parseError.offset +
275                      ", context " + prettify(parseError.preContext, TRUE) +
276                      ", rules: " + prettify(rules, TRUE));
277            } else {
278                delete u;
279            }
280            delete t;
281        }
282    }
283    assertTrue("snext()==NULL", avail->snext(ec)==NULL);
284    assertSuccess("snext()", ec);
285    delete avail;
286
287    // Now test the failure path
288    UParseError parseError;
289    UErrorCode status = U_ZERO_ERROR;
290    UnicodeString id("<Not a valid Transliterator ID>");
291    Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
292    if (t != 0) {
293        errln("FAIL: " + id + " returned a transliterator");
294        delete t;
295    } else {
296        logln("OK: Bogus ID handled properly");
297    }
298}
299
300void TransliteratorTest::TestSimpleRules(void) {
301    /* Example: rules 1. ab>x|y
302     *                2. yc>z
303     *
304     * []|eabcd  start - no match, copy e to tranlated buffer
305     * [e]|abcd  match rule 1 - copy output & adjust cursor
306     * [ex|y]cd  match rule 2 - copy output & adjust cursor
307     * [exz]|d   no match, copy d to transliterated buffer
308     * [exzd]|   done
309     */
310    expect(UnicodeString("ab>x|y;", "") +
311           "yc>z",
312           "eabcd", "exzd");
313
314    /* Another set of rules:
315     *    1. ab>x|yzacw
316     *    2. za>q
317     *    3. qc>r
318     *    4. cw>n
319     *
320     * []|ab       Rule 1
321     * [x|yzacw]   No match
322     * [xy|zacw]   Rule 2
323     * [xyq|cw]    Rule 4
324     * [xyqn]|     Done
325     */
326    expect(UnicodeString("ab>x|yzacw;") +
327           "za>q;" +
328           "qc>r;" +
329           "cw>n",
330           "ab", "xyqn");
331
332    /* Test categories
333     */
334    UErrorCode status = U_ZERO_ERROR;
335    UParseError parseError;
336    Transliterator *t = Transliterator::createFromRules(
337        "<ID>",
338        UnicodeString("$dummy=").append((UChar)0xE100) +
339        UnicodeString(";"
340                      "$vowel=[aeiouAEIOU];"
341                      "$lu=[:Lu:];"
342                      "$vowel } $lu > '!';"
343                      "$vowel > '&';"
344                      "'!' { $lu > '^';"
345                      "$lu > '*';"
346                      "a > ERROR", ""),
347        UTRANS_FORWARD, parseError,
348        status);
349    if (U_FAILURE(status)) {
350        dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
351        return;
352    }
353    expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
354    delete t;
355}
356
357/**
358 * Test inline set syntax and set variable syntax.
359 */
360void TransliteratorTest::TestInlineSet(void) {
361    expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
362    expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
363
364    expect(UnicodeString(
365           "$digit = [0-9];"
366           "$alpha = [a-zA-Z];"
367           "$alphanumeric = [$digit $alpha];" // ***
368           "$special = [^$alphanumeric];"     // ***
369           "$alphanumeric > '-';"
370           "$special > '*';", ""),
371
372           "thx-1138", "---*----");
373}
374
375/**
376 * Create some inverses and confirm that they work.  We have to be
377 * careful how we do this, since the inverses will not be true
378 * inverses -- we can't throw any random string at the composition
379 * of the transliterators and expect the identity function.  F x
380 * F' != I.  However, if we are careful about the input, we will
381 * get the expected results.
382 */
383void TransliteratorTest::TestRuleBasedInverse(void) {
384    UnicodeString RULES =
385        UnicodeString("abc>zyx;") +
386        "ab>yz;" +
387        "bc>zx;" +
388        "ca>xy;" +
389        "a>x;" +
390        "b>y;" +
391        "c>z;" +
392
393        "abc<zyx;" +
394        "ab<yz;" +
395        "bc<zx;" +
396        "ca<xy;" +
397        "a<x;" +
398        "b<y;" +
399        "c<z;" +
400
401        "";
402
403    const char* DATA[] = {
404        // Careful here -- random strings will not work.  If we keep
405        // the left side to the domain and the right side to the range
406        // we will be okay though (left, abc; right xyz).
407        "a", "x",
408        "abcacab", "zyxxxyy",
409        "caccb", "xyzzy",
410    };
411
412    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
413
414    UErrorCode status = U_ZERO_ERROR;
415    UParseError parseError;
416    Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
417                                UTRANS_FORWARD, parseError, status);
418    Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
419                                UTRANS_REVERSE, parseError, status);
420    if (U_FAILURE(status)) {
421        errln("FAIL: RBT constructor failed");
422        return;
423    }
424    for (int32_t i=0; i<DATA_length; i+=2) {
425        expect(*fwd, DATA[i], DATA[i+1]);
426        expect(*rev, DATA[i+1], DATA[i]);
427    }
428    delete fwd;
429    delete rev;
430}
431
432/**
433 * Basic test of keyboard.
434 */
435void TransliteratorTest::TestKeyboard(void) {
436    UParseError parseError;
437    UErrorCode status = U_ZERO_ERROR;
438    Transliterator *t = Transliterator::createFromRules("<ID>",
439                              UnicodeString("psch>Y;")
440                              +"ps>y;"
441                              +"ch>x;"
442                              +"a>A;",
443                              UTRANS_FORWARD, parseError,
444                              status);
445    if (U_FAILURE(status)) {
446        errln("FAIL: RBT constructor failed");
447        return;
448    }
449    const char* DATA[] = {
450        // insertion, buffer
451        "a", "A",
452        "p", "Ap",
453        "s", "Aps",
454        "c", "Apsc",
455        "a", "AycA",
456        "psch", "AycAY",
457        0, "AycAY", // null means finishKeyboardTransliteration
458    };
459
460    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
461    delete t;
462}
463
464/**
465 * Basic test of keyboard with cursor.
466 */
467void TransliteratorTest::TestKeyboard2(void) {
468    UParseError parseError;
469    UErrorCode status = U_ZERO_ERROR;
470    Transliterator *t = Transliterator::createFromRules("<ID>",
471                              UnicodeString("ych>Y;")
472                              +"ps>|y;"
473                              +"ch>x;"
474                              +"a>A;",
475                              UTRANS_FORWARD, parseError,
476                              status);
477    if (U_FAILURE(status)) {
478        errln("FAIL: RBT constructor failed");
479        return;
480    }
481    const char* DATA[] = {
482        // insertion, buffer
483        "a", "A",
484        "p", "Ap",
485        "s", "Aps", // modified for rollback - "Ay",
486        "c", "Apsc", // modified for rollback - "Ayc",
487        "a", "AycA",
488        "p", "AycAp",
489        "s", "AycAps", // modified for rollback - "AycAy",
490        "c", "AycApsc", // modified for rollback - "AycAyc",
491        "h", "AycAY",
492        0, "AycAY", // null means finishKeyboardTransliteration
493    };
494
495    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
496    delete t;
497}
498
499/**
500 * Test keyboard transliteration with back-replacement.
501 */
502void TransliteratorTest::TestKeyboard3(void) {
503    // We want th>z but t>y.  Furthermore, during keyboard
504    // transliteration we want t>y then yh>z if t, then h are
505    // typed.
506    UnicodeString RULES("t>|y;"
507                        "yh>z;");
508
509    const char* DATA[] = {
510        // Column 1: characters to add to buffer (as if typed)
511        // Column 2: expected appearance of buffer after
512        //           keyboard xliteration.
513        "a", "a",
514        "b", "ab",
515        "t", "abt", // modified for rollback - "aby",
516        "c", "abyc",
517        "t", "abyct", // modified for rollback - "abycy",
518        "h", "abycz",
519        0, "abycz", // null means finishKeyboardTransliteration
520    };
521
522    UParseError parseError;
523    UErrorCode status = U_ZERO_ERROR;
524    Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
525    if (U_FAILURE(status)) {
526        errln("FAIL: RBT constructor failed");
527        return;
528    }
529    keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
530    delete t;
531}
532
533void TransliteratorTest::keyboardAux(const Transliterator& t,
534                                     const char* DATA[], int32_t DATA_length) {
535    UErrorCode status = U_ZERO_ERROR;
536    UTransPosition index={0, 0, 0, 0};
537    UnicodeString s;
538    for (int32_t i=0; i<DATA_length; i+=2) {
539        UnicodeString log;
540        if (DATA[i] != 0) {
541            log = s + " + "
542                + DATA[i]
543                + " -> ";
544            t.transliterate(s, index, DATA[i], status);
545        } else {
546            log = s + " => ";
547            t.finishTransliteration(s, index);
548        }
549        // Show the start index '{' and the cursor '|'
550        UnicodeString a, b, c;
551        s.extractBetween(0, index.contextStart, a);
552        s.extractBetween(index.contextStart, index.start, b);
553        s.extractBetween(index.start, s.length(), c);
554        log.append(a).
555            append((UChar)LEFT_BRACE).
556            append(b).
557            append((UChar)PIPE).
558            append(c);
559        if (s == DATA[i+1] && U_SUCCESS(status)) {
560            logln(log);
561        } else {
562            errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
563        }
564    }
565}
566
567void TransliteratorTest::TestArabic(void) {
568// Test disabled for 2.0 until new Arabic transliterator can be written.
569//    /*
570//    const char* DATA[] = {
571//        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
572//                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
573//                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
574//                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
575//                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
576//                  "\u062c\u0645\u064a\u0644\u0629",
577//    };
578//    */
579//
580//    UChar ar_raw[] = {
581//        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
582//        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
583//        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
584//        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
585//        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
586//        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
587//    };
588//    UnicodeString ar(ar_raw);
589//    UErrorCode status=U_ZERO_ERROR;
590//    UParseError parseError;
591//    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
592//    if (t == 0) {
593//        errln("FAIL: createInstance failed");
594//        return;
595//    }
596//    expect(*t, "Arabic", ar);
597//    delete t;
598}
599
600/**
601 * Compose the Kana transliterator forward and reverse and try
602 * some strings that should come out unchanged.
603 */
604void TransliteratorTest::TestCompoundKana(void) {
605    UParseError parseError;
606    UErrorCode status = U_ZERO_ERROR;
607    Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
608    if (t == 0) {
609        dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
610    } else {
611        expect(*t, "aaaaa", "aaaaa");
612        delete t;
613    }
614}
615
616/**
617 * Compose the hex transliterators forward and reverse.
618 */
619void TransliteratorTest::TestCompoundHex(void) {
620    UParseError parseError;
621    UErrorCode status = U_ZERO_ERROR;
622    Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
623    Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
624    Transliterator* transab[] = { a, b };
625    Transliterator* transba[] = { b, a };
626    if (a == 0 || b == 0) {
627        errln("FAIL: construction failed");
628        delete a;
629        delete b;
630        return;
631    }
632    // Do some basic tests of a
633    expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
634    // Do some basic tests of b
635    expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
636
637    Transliterator* ab = new CompoundTransliterator(transab, 2);
638    UnicodeString s("abcde", "");
639    expect(*ab, s, s);
640
641    UnicodeString str(s);
642    a->transliterate(str);
643    Transliterator* ba = new CompoundTransliterator(transba, 2);
644    expect(*ba, str, str);
645
646    delete ab;
647    delete ba;
648    delete a;
649    delete b;
650}
651
652int gTestFilterClassID = 0;
653/**
654 * Used by TestFiltering().
655 */
656class TestFilter : public UnicodeFilter {
657    virtual UnicodeFunctor* clone() const {
658        return new TestFilter(*this);
659    }
660    virtual UBool contains(UChar32 c) const {
661        return c != (UChar)0x0063 /*c*/;
662    }
663    // Stubs
664    virtual UnicodeString& toPattern(UnicodeString& result,
665                                     UBool /*escapeUnprintable*/) const {
666        return result;
667    }
668    virtual UBool matchesIndexValue(uint8_t /*v*/) const {
669        return FALSE;
670    }
671    virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
672public:
673    UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
674};
675
676/**
677 * Do some basic tests of filtering.
678 */
679void TransliteratorTest::TestFiltering(void) {
680    UParseError parseError;
681    UErrorCode status = U_ZERO_ERROR;
682    Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
683    if (hex == 0) {
684        errln("FAIL: createInstance(Any-Hex) failed");
685        return;
686    }
687    hex->adoptFilter(new TestFilter());
688    UnicodeString s("abcde");
689    hex->transliterate(s);
690    UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
691    if (s == exp) {
692        logln(UnicodeString("Ok:   \"") + exp + "\"");
693    } else {
694        logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
695    }
696
697    // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
698    UnicodeFilter *f = hex->orphanFilter();
699    if (f == NULL){
700        errln("FAIL: orphanFilter() should get a UnicodeFilter");
701    } else {
702        delete f;
703    }
704    delete hex;
705}
706
707/**
708 * Test anchors
709 */
710void TransliteratorTest::TestAnchors(void) {
711    expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
712           "aaa",
713           "012");
714    expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
715           "aaa",
716           "012");
717    expect(UnicodeString("^ab  > 01 ;"
718           " ab  > |8 ;"
719           "  b  > k ;"
720           " 8x$ > 45 ;"
721           " 8x  > 77 ;", ""),
722
723           "ababbabxabx",
724           "018k7745");
725    expect(UnicodeString("$s = [z$] ;"
726           "$s{ab    > 01 ;"
727           "   ab    > |8 ;"
728           "    b    > k ;"
729           "   8x}$s > 45 ;"
730           "   8x    > 77 ;", ""),
731
732           "abzababbabxzabxabx",
733           "01z018k45z01x45");
734}
735
736/**
737 * Test pattern quoting and escape mechanisms.
738 */
739void TransliteratorTest::TestPatternQuoting(void) {
740    // Array of 3n items
741    // Each item is <rules>, <input>, <expected output>
742    const UnicodeString DATA[] = {
743        UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
744        UnicodeString(UChar(0x4E01)),
745        "[male adult]"
746    };
747
748    for (int32_t i=0; i<3; i+=3) {
749        logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
750        UParseError parseError;
751        UErrorCode status = U_ZERO_ERROR;
752        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
753        if (U_FAILURE(status)) {
754            errln("RBT constructor failed");
755        } else {
756            expect(*t, DATA[i+1], DATA[i+2]);
757        }
758        delete t;
759    }
760}
761
762/**
763 * Regression test for bugs found in Greek transliteration.
764 */
765void TransliteratorTest::TestJ277(void) {
766    UErrorCode status = U_ZERO_ERROR;
767    UParseError parseError;
768    Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
769    if (gl == NULL) {
770        dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
771        return;
772    }
773
774    UChar sigma = 0x3C3;
775    UChar upsilon = 0x3C5;
776    UChar nu = 0x3BD;
777//    UChar PHI = 0x3A6;
778    UChar alpha = 0x3B1;
779//    UChar omega = 0x3C9;
780//    UChar omicron = 0x3BF;
781//    UChar epsilon = 0x3B5;
782
783    // sigma upsilon nu -> syn
784    UnicodeString syn;
785    syn.append(sigma).append(upsilon).append(nu);
786    expect(*gl, syn, "syn");
787
788    // sigma alpha upsilon nu -> saun
789    UnicodeString sayn;
790    sayn.append(sigma).append(alpha).append(upsilon).append(nu);
791    expect(*gl, sayn, "saun");
792
793    // Again, using a smaller rule set
794    UnicodeString rules(
795                "$alpha   = \\u03B1;"
796                "$nu      = \\u03BD;"
797                "$sigma   = \\u03C3;"
798                "$ypsilon = \\u03C5;"
799                "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
800                "s <>           $sigma;"
801                "a <>           $alpha;"
802                "u <>  $vowel { $ypsilon;"
803                "y <>           $ypsilon;"
804                "n <>           $nu;",
805                "");
806    Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
807    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
808    expect(*mini, syn, "syn");
809    expect(*mini, sayn, "saun");
810    delete mini;
811    mini = NULL;
812
813#if !UCONFIG_NO_FORMATTING
814    // Transliterate the Greek locale data
815    Locale el("el");
816    DateFormatSymbols syms(el, status);
817    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
818    int32_t i, count;
819    const UnicodeString* data = syms.getMonths(count);
820    for (i=0; i<count; ++i) {
821        if (data[i].length() == 0) {
822            continue;
823        }
824        UnicodeString out(data[i]);
825        gl->transliterate(out);
826        UBool ok = TRUE;
827        if (data[i].length() >= 2 && out.length() >= 2 &&
828            u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
829            if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
830                ok = FALSE;
831            }
832        }
833        if (ok) {
834            logln(prettify(data[i] + " -> " + out));
835        } else {
836            errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
837        }
838    }
839#endif
840
841    delete gl;
842}
843
844/**
845 * Prefix, suffix support in hex transliterators
846 */
847void TransliteratorTest::TestJ243(void) {
848    UErrorCode ec = U_ZERO_ERROR;
849
850    // Test default Hex-Any, which should handle
851    // \u, \U, u+, and U+
852    Transliterator *hex =
853        Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
854    if (assertSuccess("getInstance", ec)) {
855        expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
856    }
857    delete hex;
858
859//    // Try a custom Hex-Unicode
860//    // \uXXXX and &#xXXXX;
861//    ec = U_ZERO_ERROR;
862//    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
863//    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
864//           "abcd5fx012&#x00033;");
865//    // Try custom Any-Hex (default is tested elsewhere)
866//    ec = U_ZERO_ERROR;
867//    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
868//    expect(hex3, "012", "&#x30;&#x31;&#x32;");
869}
870
871/**
872 * Parsers need better syntax error messages.
873 */
874void TransliteratorTest::TestJ329(void) {
875
876    struct { UBool containsErrors; const char* rule; } DATA[] = {
877        { FALSE, "a > b; c > d" },
878        { TRUE,  "a > b; no operator; c > d" },
879    };
880    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
881
882    for (int32_t i=0; i<DATA_length; ++i) {
883        UErrorCode status = U_ZERO_ERROR;
884        UParseError parseError;
885        Transliterator *rbt = Transliterator::createFromRules("<ID>",
886                                    DATA[i].rule,
887                                    UTRANS_FORWARD,
888                                    parseError,
889                                    status);
890        UBool gotError = U_FAILURE(status);
891        UnicodeString desc(DATA[i].rule);
892        desc.append(gotError ? " -> error" : " -> no error");
893        if (gotError) {
894            desc = desc + ", ParseError code=" + u_errorName(status) +
895                " line=" + parseError.line +
896                " offset=" + parseError.offset +
897                " context=" + parseError.preContext;
898        }
899        if (gotError == DATA[i].containsErrors) {
900            logln(UnicodeString("Ok:   ") + desc);
901        } else {
902            errln(UnicodeString("FAIL: ") + desc);
903        }
904        delete rbt;
905    }
906}
907
908/**
909 * Test segments and segment references.
910 */
911void TransliteratorTest::TestSegments(void) {
912    // Array of 3n items
913    // Each item is <rules>, <input>, <expected output>
914    UnicodeString DATA[] = {
915        "([a-z]) '.' ([0-9]) > $2 '-' $1",
916        "abc.123.xyz.456",
917        "ab1-c23.xy4-z56",
918
919        // nested
920        "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
921        "a1 b2",
922        "a1.a.1 b2.b.2",
923    };
924    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
925
926    for (int32_t i=0; i<DATA_length; i+=3) {
927        logln("Pattern: " + prettify(DATA[i]));
928        UParseError parseError;
929        UErrorCode status = U_ZERO_ERROR;
930        Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
931        if (U_FAILURE(status)) {
932            errln("FAIL: RBT constructor");
933        } else {
934            expect(*t, DATA[i+1], DATA[i+2]);
935        }
936        delete t;
937    }
938}
939
940/**
941 * Test cursor positioning outside of the key
942 */
943void TransliteratorTest::TestCursorOffset(void) {
944    // Array of 3n items
945    // Each item is <rules>, <input>, <expected output>
946    UnicodeString DATA[] = {
947        "pre {alpha} post > | @ ALPHA ;"
948        "eALPHA > beta ;"
949        "pre {beta} post > BETA @@ | ;"
950        "post > xyz",
951
952        "prealphapost prebetapost",
953
954        "prbetaxyz preBETApost",
955    };
956    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
957
958    for (int32_t i=0; i<DATA_length; i+=3) {
959        logln("Pattern: " + prettify(DATA[i]));
960        UParseError parseError;
961        UErrorCode status = U_ZERO_ERROR;
962        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
963        if (U_FAILURE(status)) {
964            errln("FAIL: RBT constructor");
965        } else {
966            expect(*t, DATA[i+1], DATA[i+2]);
967        }
968        delete t;
969    }
970}
971
972/**
973 * Test zero length and > 1 char length variable values.  Test
974 * use of variable refs in UnicodeSets.
975 */
976void TransliteratorTest::TestArbitraryVariableValues(void) {
977    // Array of 3n items
978    // Each item is <rules>, <input>, <expected output>
979    UnicodeString DATA[] = {
980        "$abe = ab;"
981        "$pat = x[yY]z;"
982        "$ll  = 'a-z';"
983        "$llZ = [$ll];"
984        "$llY = [$ll$pat];"
985        "$emp = ;"
986
987        "$abe > ABE;"
988        "$pat > END;"
989        "$llZ > 1;"
990        "$llY > 2;"
991        "7$emp 8 > 9;"
992        "",
993
994        "ab xYzxyz stY78",
995        "ABE ENDEND 1129",
996    };
997    int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
998
999    for (int32_t i=0; i<DATA_length; i+=3) {
1000        logln("Pattern: " + prettify(DATA[i]));
1001        UParseError parseError;
1002        UErrorCode status = U_ZERO_ERROR;
1003        Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1004        if (U_FAILURE(status)) {
1005            errln("FAIL: RBT constructor");
1006        } else {
1007            expect(*t, DATA[i+1], DATA[i+2]);
1008        }
1009        delete t;
1010    }
1011}
1012
1013/**
1014 * Confirm that the contextStart, contextLimit, start, and limit
1015 * behave correctly. J474.
1016 */
1017void TransliteratorTest::TestPositionHandling(void) {
1018    // Array of 3n items
1019    // Each item is <rules>, <input>, <expected output>
1020    const char* DATA[] = {
1021        "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1022        "xtat txtb", // pos 0,9,0,9
1023        "xTTaSS TTxUUb",
1024
1025        "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1026        "xtat txtb", // pos 2,9,3,8
1027        "xtaSS TTxUUb",
1028
1029        "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1030        "xtat txtb", // pos 3,8,3,8
1031        "xtaTT TTxTTb",
1032    };
1033
1034    // Array of 4n positions -- these go with the DATA array
1035    // They are: contextStart, contextLimit, start, limit
1036    int32_t POS[] = {
1037        0, 9, 0, 9,
1038        2, 9, 3, 8,
1039        3, 8, 3, 8,
1040    };
1041
1042    int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1043    for (int32_t i=0; i<n; i++) {
1044        UErrorCode status = U_ZERO_ERROR;
1045        UParseError parseError;
1046        Transliterator *t = Transliterator::createFromRules("<ID>",
1047                                DATA[3*i], UTRANS_FORWARD, parseError, status);
1048        if (U_FAILURE(status)) {
1049            delete t;
1050            errln("FAIL: RBT constructor");
1051            return;
1052        }
1053        UTransPosition pos;
1054        pos.contextStart= POS[4*i];
1055        pos.contextLimit = POS[4*i+1];
1056        pos.start = POS[4*i+2];
1057        pos.limit = POS[4*i+3];
1058        UnicodeString rsource(DATA[3*i+1]);
1059        t->transliterate(rsource, pos, status);
1060        if (U_FAILURE(status)) {
1061            delete t;
1062            errln("FAIL: transliterate");
1063            return;
1064        }
1065        t->finishTransliteration(rsource, pos);
1066        expectAux(DATA[3*i],
1067                  DATA[3*i+1],
1068                  rsource,
1069                  DATA[3*i+2]);
1070        delete t;
1071    }
1072}
1073
1074/**
1075 * Test the Hiragana-Katakana transliterator.
1076 */
1077void TransliteratorTest::TestHiraganaKatakana(void) {
1078    UParseError parseError;
1079    UErrorCode status = U_ZERO_ERROR;
1080    Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1081    Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1082    if (hk == 0 || kh == 0) {
1083        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1084        delete hk;
1085        delete kh;
1086        return;
1087    }
1088
1089    // Array of 3n items
1090    // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1091    const char* DATA[] = {
1092        "both",
1093        "\\u3042\\u3090\\u3099\\u3092\\u3050",
1094        "\\u30A2\\u30F8\\u30F2\\u30B0",
1095
1096        "kh",
1097        "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1098        "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1099    };
1100    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1101
1102    for (int32_t i=0; i<DATA_length; i+=3) {
1103        UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1104        UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1105        switch (*DATA[i]) {
1106        case 0x68: //'h': // Hiragana-Katakana
1107            expect(*hk, h, k);
1108            break;
1109        case 0x6B: //'k': // Katakana-Hiragana
1110            expect(*kh, k, h);
1111            break;
1112        case 0x62: //'b': // both
1113            expect(*hk, h, k);
1114            expect(*kh, k, h);
1115            break;
1116        }
1117    }
1118    delete hk;
1119    delete kh;
1120}
1121
1122/**
1123 * Test cloning / copy constructor of RBT.
1124 */
1125void TransliteratorTest::TestCopyJ476(void) {
1126    // The real test here is what happens when the destructors are
1127    // called.  So we let one object get destructed, and check to
1128    // see that its copy still works.
1129    Transliterator *t2 = 0;
1130    {
1131        UParseError parseError;
1132        UErrorCode status = U_ZERO_ERROR;
1133        Transliterator *t1 = Transliterator::createFromRules("t1",
1134            "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1135        if (U_FAILURE(status)) {
1136            errln("FAIL: RBT constructor");
1137            return;
1138        }
1139        t2 = t1->clone(); // Call copy constructor under the covers.
1140        expect(*t1, "abcfoofoo", "ABcbar");
1141        delete t1;
1142    }
1143    expect(*t2, "abcfoofoo", "ABcbar");
1144    delete t2;
1145}
1146
1147/**
1148 * Test inter-Indic transliterators.  These are composed.
1149 * ICU4C Jitterbug 483.
1150 */
1151void TransliteratorTest::TestInterIndic(void) {
1152    UnicodeString ID("Devanagari-Gujarati", "");
1153    UErrorCode status = U_ZERO_ERROR;
1154    UParseError parseError;
1155    Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1156    if (dg == 0) {
1157        dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1158        return;
1159    }
1160    UnicodeString id = dg->getID();
1161    if (id != ID) {
1162        errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1163    }
1164    UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1165    UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1166    expect(*dg, dev, guj);
1167    delete dg;
1168}
1169
1170/**
1171 * Test filter syntax in IDs. (J918)
1172 */
1173void TransliteratorTest::TestFilterIDs(void) {
1174    // Array of 3n strings:
1175    // <id>, <inverse id>, <input>, <expected output>
1176    const char* DATA[] = {
1177        "[aeiou]Any-Hex", // ID
1178        "[aeiou]Hex-Any", // expected inverse ID
1179        "quizzical",      // src
1180        "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1181
1182        "[aeiou]Any-Hex;[^5]Hex-Any",
1183        "[^5]Any-Hex;[aeiou]Hex-Any",
1184        "quizzical",
1185        "q\\u0075izzical",
1186
1187        "[abc]Null",
1188        "[abc]Null",
1189        "xyz",
1190        "xyz",
1191    };
1192    enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1193
1194    for (int i=0; i<DATA_length; i+=4) {
1195        UnicodeString ID(DATA[i], "");
1196        UnicodeString uID(DATA[i+1], "");
1197        UnicodeString data2(DATA[i+2], "");
1198        UnicodeString data3(DATA[i+3], "");
1199        UParseError parseError;
1200        UErrorCode status = U_ZERO_ERROR;
1201        Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1202        if (t == 0) {
1203            errln("FAIL: createInstance(" + ID + ") returned NULL");
1204            return;
1205        }
1206        expect(*t, data2, data3);
1207
1208        // Check the ID
1209        if (ID != t->getID()) {
1210            errln("FAIL: createInstance(" + ID + ").getID() => " +
1211                  t->getID());
1212        }
1213
1214        // Check the inverse
1215        Transliterator *u = t->createInverse(status);
1216        if (u == 0) {
1217            errln("FAIL: " + ID + ".createInverse() returned NULL");
1218        } else if (u->getID() != uID) {
1219            errln("FAIL: " + ID + ".createInverse().getID() => " +
1220                  u->getID() + ", expected " + uID);
1221        }
1222
1223        delete t;
1224        delete u;
1225    }
1226}
1227
1228/**
1229 * Test the case mapping transliterators.
1230 */
1231void TransliteratorTest::TestCaseMap(void) {
1232    UParseError parseError;
1233    UErrorCode status = U_ZERO_ERROR;
1234    Transliterator* toUpper =
1235        Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1236    Transliterator* toLower =
1237        Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1238    Transliterator* toTitle =
1239        Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1240    if (toUpper==0 || toLower==0 || toTitle==0) {
1241        errln("FAIL: createInstance returned NULL");
1242        delete toUpper;
1243        delete toLower;
1244        delete toTitle;
1245        return;
1246    }
1247
1248    expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1249           "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1250    expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1251           "the quick brown foX jumped over the lazY dogs.");
1252    expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1253           "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1254
1255    delete toUpper;
1256    delete toLower;
1257    delete toTitle;
1258}
1259
1260/**
1261 * Test the name mapping transliterators.
1262 */
1263void TransliteratorTest::TestNameMap(void) {
1264    UParseError parseError;
1265    UErrorCode status = U_ZERO_ERROR;
1266    Transliterator* uni2name =
1267        Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1268    Transliterator* name2uni =
1269        Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1270    if (uni2name==0 || name2uni==0) {
1271        errln("FAIL: createInstance returned NULL");
1272        delete uni2name;
1273        delete name2uni;
1274        return;
1275    }
1276
1277    // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1278    expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1279           CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1280    expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1281           CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1282
1283    delete uni2name;
1284    delete name2uni;
1285
1286    // round trip
1287    Transliterator* t =
1288        Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1289    if (t==0) {
1290        errln("FAIL: createInstance returned NULL");
1291        delete t;
1292        return;
1293    }
1294
1295    // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1296    UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1297    expect(*t, s, s);
1298    delete t;
1299}
1300
1301/**
1302 * Test liberalized ID syntax.  1006c
1303 */
1304void TransliteratorTest::TestLiberalizedID(void) {
1305    // Some test cases have an expected getID() value of NULL.  This
1306    // means I have disabled the test case for now.  This stuff is
1307    // still under development, and I haven't decided whether to make
1308    // getID() return canonical case yet.  It will all get rewritten
1309    // with the move to Source-Target/Variant IDs anyway. [aliu]
1310    const char* DATA[] = {
1311        "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1312        "  Null  ", "Null", "whitespace",
1313        " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1314        "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1315    };
1316    const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1317    UParseError parseError;
1318    UErrorCode status= U_ZERO_ERROR;
1319    for (int32_t i=0; i<DATA_length; i+=3) {
1320        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1321        if (t == 0) {
1322            dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1323                  " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1324        } else {
1325            UnicodeString exp;
1326            if (DATA[i+1]) {
1327                exp = UnicodeString(DATA[i+1], "");
1328            }
1329            // Don't worry about getID() if the expected char*
1330            // is NULL -- see above.
1331            if (exp.length() == 0 || exp == t->getID()) {
1332                logln(UnicodeString("Ok: ") + DATA[i+2] +
1333                      " create ID \"" + DATA[i] + "\" => \"" +
1334                      exp + "\"");
1335            } else {
1336                errln(UnicodeString("FAIL: ") + DATA[i+2] +
1337                      " create ID \"" + DATA[i] + "\" => \"" +
1338                      t->getID() + "\", exp \"" + exp + "\"");
1339            }
1340            delete t;
1341        }
1342    }
1343}
1344
1345/* test for Jitterbug 912 */
1346void TransliteratorTest::TestCreateInstance(){
1347    const char* FORWARD = "F";
1348    const char* REVERSE = "R";
1349    const char* DATA[] = {
1350        // Column 1: id
1351        // Column 2: direction
1352        // Column 3: expected ID, or "" if expect failure
1353        "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1354
1355        // JB#2689: bad compound causes crash
1356        "InvalidSource-InvalidTarget", FORWARD, "",
1357        "InvalidSource-InvalidTarget", REVERSE, "",
1358        "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1359        "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1360        "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1361        "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1362
1363        NULL
1364    };
1365
1366    for (int32_t i=0; DATA[i]; i+=3) {
1367        UParseError err;
1368        UErrorCode ec = U_ZERO_ERROR;
1369        UnicodeString id(DATA[i]);
1370        UTransDirection dir = (DATA[i+1]==FORWARD)?
1371            UTRANS_FORWARD:UTRANS_REVERSE;
1372        UnicodeString expID(DATA[i+2]);
1373        Transliterator* t =
1374            Transliterator::createInstance(id,dir,err,ec);
1375        UnicodeString newID;
1376        if (t) {
1377            newID = t->getID();
1378        }
1379        UBool ok = (newID == expID);
1380        if (!t) {
1381            newID = u_errorName(ec);
1382        }
1383        if (ok) {
1384            logln((UnicodeString)"Ok: createInstance(" +
1385                  id + "," + DATA[i+1] + ") => " + newID);
1386        } else {
1387            dataerrln((UnicodeString)"FAIL: createInstance(" +
1388                  id + "," + DATA[i+1] + ") => " + newID +
1389                  ", expected " + expID);
1390        }
1391        delete t;
1392    }
1393}
1394
1395/**
1396 * Test the normalization transliterator.
1397 */
1398void TransliteratorTest::TestNormalizationTransliterator() {
1399    // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1400    // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1401    const char* CANON[] = {
1402        // Input               Decomposed            Composed
1403        "cat",                "cat",                "cat"               ,
1404        "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1405
1406        "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1407        "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1408
1409        "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1410        "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1411        "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1412
1413        "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1414        "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1415
1416        "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1417        "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1418        "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1419
1420        "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1421        "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1422
1423        "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1424        "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1425
1426        "Henry IV",           "Henry IV",           "Henry IV"          ,
1427        "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1428
1429        "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1430        "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1431        "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1432        "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1433        "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1434
1435        "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1436        0 // end
1437    };
1438
1439    const char* COMPAT[] = {
1440        // Input               Decomposed            Composed
1441        "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1442
1443        "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1444        "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1445
1446        "Henry IV",           "Henry IV",           "Henry IV"          ,
1447        "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1448
1449        "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1450        "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1451
1452        "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1453        0 // end
1454    };
1455
1456    int32_t i;
1457    UParseError parseError;
1458    UErrorCode status = U_ZERO_ERROR;
1459    Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1460    Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1461    if (!NFD || !NFC) {
1462        dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1463        delete NFD;
1464        delete NFC;
1465        return;
1466    }
1467    for (i=0; CANON[i]; i+=3) {
1468        UnicodeString in = CharsToUnicodeString(CANON[i]);
1469        UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1470        UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1471        expect(*NFD, in, expd);
1472        expect(*NFC, in, expc);
1473    }
1474    delete NFD;
1475    delete NFC;
1476
1477    Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1478    Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1479    if (!NFKD || !NFKC) {
1480        errln("FAIL: createInstance failed");
1481        delete NFKD;
1482        delete NFKC;
1483        return;
1484    }
1485    for (i=0; COMPAT[i]; i+=3) {
1486        UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1487        UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1488        UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1489        expect(*NFKD, in, expkd);
1490        expect(*NFKC, in, expkc);
1491    }
1492    delete NFKD;
1493    delete NFKC;
1494
1495    UParseError pe;
1496    status = U_ZERO_ERROR;
1497    Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1498                                                       UTRANS_FORWARD,
1499                                                       pe, status);
1500    if (t == 0) {
1501        errln("FAIL: createInstance failed");
1502    }
1503    expect(*t, CharsToUnicodeString("\\u010dx"),
1504           CharsToUnicodeString("c\\u030C"));
1505    delete t;
1506}
1507
1508/**
1509 * Test compound RBT rules.
1510 */
1511void TransliteratorTest::TestCompoundRBT(void) {
1512    // Careful with spacing and ';' here:  Phrase this exactly
1513    // as toRules() is going to return it.  If toRules() changes
1514    // with regard to spacing or ';', then adjust this string.
1515    UnicodeString rule("::Hex-Any;\n"
1516                       "::Any-Lower;\n"
1517                       "a > '.A.';\n"
1518                       "b > '.B.';\n"
1519                       "::[^t]Any-Upper;", "");
1520    UParseError parseError;
1521    UErrorCode status = U_ZERO_ERROR;
1522    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1523    if (t == 0) {
1524        errln("FAIL: createFromRules failed");
1525        return;
1526    }
1527    expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1528           "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1529    UnicodeString r;
1530    t->toRules(r, TRUE);
1531    if (r == rule) {
1532        logln((UnicodeString)"OK: toRules() => " + r);
1533    } else {
1534        errln((UnicodeString)"FAIL: toRules() => " + r +
1535              ", expected " + rule);
1536    }
1537    delete t;
1538
1539    // Now test toRules
1540    t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1541    if (t == 0) {
1542        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1543        return;
1544    }
1545    UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1546    t->toRules(r, TRUE);
1547    if (r != exp) {
1548        errln((UnicodeString)"FAIL: toRules() => " + r +
1549              ", expected " + exp);
1550    } else {
1551        logln((UnicodeString)"OK: toRules() => " + r);
1552    }
1553    delete t;
1554
1555    // Round trip the result of toRules
1556    t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1557    if (t == 0) {
1558        errln("FAIL: createFromRules #2 failed");
1559        return;
1560    } else {
1561        logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1562    }
1563
1564    // Test toRules again
1565    t->toRules(r, TRUE);
1566    if (r != exp) {
1567        errln((UnicodeString)"FAIL: toRules() => " + r +
1568              ", expected " + exp);
1569    } else {
1570        logln((UnicodeString)"OK: toRules() => " + r);
1571    }
1572
1573    delete t;
1574
1575    // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1576    // to what the regenerated ID will look like.
1577    UnicodeString id("Upper(Lower);(NFKC)", "");
1578    t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1579    if (t == 0) {
1580        errln("FAIL: createInstance #2 failed");
1581        return;
1582    }
1583    if (t->getID() == id) {
1584        logln((UnicodeString)"OK: created " + id);
1585    } else {
1586        errln((UnicodeString)"FAIL: createInstance(" + id +
1587              ").getID() => " + t->getID());
1588    }
1589
1590    Transliterator *u = t->createInverse(status);
1591    if (u == 0) {
1592        errln("FAIL: createInverse failed");
1593        delete t;
1594        return;
1595    }
1596    exp = "NFKC();Lower(Upper)";
1597    if (u->getID() == exp) {
1598        logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1599              u->getID());
1600    } else {
1601        errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1602              u->getID());
1603    }
1604    delete t;
1605    delete u;
1606}
1607
1608/**
1609 * Compound filter semantics were orginially not implemented
1610 * correctly.  Originally, each component filter f(i) is replaced by
1611 * f'(i) = f(i) && g, where g is the filter for the compound
1612 * transliterator.
1613 *
1614 * From Mark:
1615 *
1616 * Suppose and I have a transliterator X. Internally X is
1617 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1618 *
1619 * The compound should convert all greek characters (through latin) to
1620 * cyrillic, then lowercase the result. The filter should say "don't
1621 * touch 'A' in the original". But because an intermediate result
1622 * happens to go through "A", the Greek Alpha gets hung up.
1623 */
1624void TransliteratorTest::TestCompoundFilter(void) {
1625    UParseError parseError;
1626    UErrorCode status = U_ZERO_ERROR;
1627    Transliterator *t = Transliterator::createInstance
1628        ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1629    if (t == 0) {
1630        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1631        return;
1632    }
1633    t->adoptFilter(new UnicodeSet("[^A]", status));
1634    if (U_FAILURE(status)) {
1635        errln("FAIL: UnicodeSet ct failed");
1636        delete t;
1637        return;
1638    }
1639
1640    // Only the 'A' at index 1 should remain unchanged
1641    expect(*t,
1642           CharsToUnicodeString("BA\\u039A\\u0391"),
1643           CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1644    delete t;
1645}
1646
1647void TransliteratorTest::TestRemove(void) {
1648    UParseError parseError;
1649    UErrorCode status = U_ZERO_ERROR;
1650    Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1651    if (t == 0) {
1652        errln("FAIL: createInstance failed");
1653        return;
1654    }
1655
1656    expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1657
1658    // extra test for RemoveTransliterator::clone(), which at one point wasn't
1659    // duplicating the filter
1660    Transliterator* t2 = t->clone();
1661    expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1662
1663    delete t;
1664    delete t2;
1665}
1666
1667void TransliteratorTest::TestToRules(void) {
1668    const char* RBT = "rbt";
1669    const char* SET = "set";
1670    static const char* DATA[] = {
1671        RBT,
1672        "$a=\\u4E61; [$a] > A;",
1673        "[\\u4E61] > A;",
1674
1675        RBT,
1676        "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1677        "[[:Zs:][:Zl:]]{a} > A;",
1678
1679        SET,
1680        "[[:Zs:][:Zl:]]",
1681        "[[:Zs:][:Zl:]]",
1682
1683        SET,
1684        "[:Ps:]",
1685        "[:Ps:]",
1686
1687        SET,
1688        "[:L:]",
1689        "[:L:]",
1690
1691        SET,
1692        "[[:L:]-[A]]",
1693        "[[:L:]-[A]]",
1694
1695        SET,
1696        "[~[:Lu:][:Ll:]]",
1697        "[~[:Lu:][:Ll:]]",
1698
1699        SET,
1700        "[~[a-z]]",
1701        "[~[a-z]]",
1702
1703        RBT,
1704        "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1705        "[^[:Zs:]]{a} > A;",
1706
1707        RBT,
1708        "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1709        "[[a-z]-[:Zs:]]{a} > A;",
1710
1711        RBT,
1712        "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1713        "[[:Zs:]&[a-z]]{a} > A;",
1714
1715        RBT,
1716        "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1717        "[x[:Zs:]]{a} > A;",
1718
1719        RBT,
1720        "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1721        "$macron = \\u0304 ;"
1722        "$evowel = [aeiouyAEIOUY] ;"
1723        "$iotasub = \\u0345 ;"
1724        "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1725        "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1726
1727        RBT,
1728        "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1729        "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730    };
1731    static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1732
1733    for (int32_t d=0; d < DATA_length; d+=3) {
1734        if (DATA[d] == RBT) {
1735            // Transliterator test
1736            UParseError parseError;
1737            UErrorCode status = U_ZERO_ERROR;
1738            Transliterator *t = Transliterator::createFromRules("ID",
1739                                                                UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1740            if (t == 0) {
1741                dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1742                return;
1743            }
1744            UnicodeString rules, escapedRules;
1745            t->toRules(rules, FALSE);
1746            t->toRules(escapedRules, TRUE);
1747            UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1748            UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1749            if (rules == expRules) {
1750                logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1751                      " => " + rules);
1752            } else {
1753                errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754                      " => " + rules + ", exp " + expRules);
1755            }
1756            if (escapedRules == expEscapedRules) {
1757                logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1758                      " => " + escapedRules);
1759            } else {
1760                errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761                      " => " + escapedRules + ", exp " + expEscapedRules);
1762            }
1763            delete t;
1764
1765        } else {
1766            // UnicodeSet test
1767            UErrorCode status = U_ZERO_ERROR;
1768            UnicodeString pat(DATA[d+1], -1, US_INV);
1769            UnicodeString expToPat(DATA[d+2], -1, US_INV);
1770            UnicodeSet set(pat, status);
1771            if (U_FAILURE(status)) {
1772                errln("FAIL: UnicodeSet ct failed");
1773                return;
1774            }
1775            // Adjust spacing etc. as necessary.
1776            UnicodeString toPat;
1777            set.toPattern(toPat);
1778            if (expToPat == toPat) {
1779                logln((UnicodeString)"Ok: " + pat +
1780                      " => " + toPat);
1781            } else {
1782                errln((UnicodeString)"FAIL: " + pat +
1783                      " => " + prettify(toPat, TRUE) +
1784                      ", exp " + prettify(pat, TRUE));
1785            }
1786        }
1787    }
1788}
1789
1790void TransliteratorTest::TestContext() {
1791    UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1792    expect("de > x; {d}e > y;",
1793           "de",
1794           "ye",
1795           &pos);
1796
1797    expect("ab{c} > z;",
1798           "xadabdabcy",
1799           "xadabdabzy");
1800}
1801
1802void TransliteratorTest::TestSupplemental() {
1803
1804    expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1805                                "a > $a; $s > i;"),
1806           CharsToUnicodeString("ab\\U0001030Fx"),
1807           CharsToUnicodeString("\\U00010300bix"));
1808
1809    expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1810                                "$b=[A-Z\\U00010400-\\U0001044D];"
1811                                "($a)($b) > $2 $1;"),
1812           CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1813           CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1814
1815    // k|ax\\U00010300xm
1816
1817    // k|a\\U00010400\\U00010300xm
1818    // ky|\\U00010400\\U00010300xm
1819    // ky\\U00010400|\\U00010300xm
1820
1821    // ky\\U00010400|\\U00010300\\U00010400m
1822    // ky\\U00010400y|\\U00010400m
1823    expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1824                                "$a {x} > | @ \\U00010400;"
1825                                "{$a} [^\\u0000-\\uFFFF] > y;"),
1826           CharsToUnicodeString("kax\\U00010300xm"),
1827           CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1828
1829    expectT("Any-Name",
1830           CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1831           UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1832
1833    expectT("Any-Hex/Unicode",
1834           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1835           UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1836
1837    expectT("Any-Hex/C",
1838           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1839           UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1840
1841    expectT("Any-Hex/Perl",
1842           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1843           UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1844
1845    expectT("Any-Hex/Java",
1846           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1847           UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1848
1849    expectT("Any-Hex/XML",
1850           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1851           "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1852
1853    expectT("Any-Hex/XML10",
1854           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1855           "&#66352;&#1113856;&#917601;&#160;");
1856
1857    expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1858           CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1859           CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1860}
1861
1862void TransliteratorTest::TestQuantifier() {
1863
1864    // Make sure @ in a quantified anteContext works
1865    expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1866           "AAAAAb",
1867           "aaa(aac)");
1868
1869    // Make sure @ in a quantified postContext works
1870    expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1871           "baaaaa",
1872           "caa(aaa)");
1873
1874    // Make sure @ in a quantified postContext with seg ref works
1875    expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1876           "baaaaa",
1877           "baa(aaa)");
1878
1879    // Make sure @ past ante context doesn't enter ante context
1880    UTransPosition pos = {0, 5, 3, 5};
1881    expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1882           "xxxab",
1883           "xxx(ac)",
1884           &pos);
1885
1886    // Make sure @ past post context doesn't pass limit
1887    UTransPosition pos2 = {0, 4, 0, 2};
1888    expect("{b} a+ > c @@ |; x > y; a > A;",
1889           "baxx",
1890           "caxx",
1891           &pos2);
1892
1893    // Make sure @ past post context doesn't enter post context
1894    expect("{b} a+ > c @@ |; x > y; a > A;",
1895           "baxx",
1896           "cayy");
1897
1898    expect("(ab)? c > d;",
1899           "c abc ababc",
1900           "d d abd");
1901
1902    // NOTE: The (ab)+ when referenced just yields a single "ab",
1903    // not the full sequence of them.  This accords with perl behavior.
1904    expect("(ab)+ {x} > '(' $1 ')';",
1905           "x abx ababxy",
1906           "x ab(ab) abab(ab)y");
1907
1908    expect("b+ > x;",
1909           "ac abc abbc abbbc",
1910           "ac axc axc axc");
1911
1912    expect("[abc]+ > x;",
1913           "qac abrc abbcs abtbbc",
1914           "qx xrx xs xtx");
1915
1916    expect("q{(ab)+} > x;",
1917           "qa qab qaba qababc qaba",
1918           "qa qx qxa qxc qxa");
1919
1920    expect("q(ab)* > x;",
1921           "qa qab qaba qababc",
1922           "xa x xa xc");
1923
1924    // NOTE: The (ab)+ when referenced just yields a single "ab",
1925    // not the full sequence of them.  This accords with perl behavior.
1926    expect("q(ab)* > '(' $1 ')';",
1927           "qa qab qaba qababc",
1928           "()a (ab) (ab)a (ab)c");
1929
1930    // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1931    // quoted string
1932    expect("'ab'+ > x;",
1933           "bb ab ababb",
1934           "bb x xb");
1935
1936    // $foo+ and $foo* -- the quantifier should apply to the entire
1937    // variable reference
1938    expect("$var = ab; $var+ > x;",
1939           "bb ab ababb",
1940           "bb x xb");
1941}
1942
1943class TestTrans : public Transliterator {
1944public:
1945    TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1946    }
1947    virtual Transliterator* clone(void) const {
1948        return new TestTrans(getID());
1949    }
1950    virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1951        UBool /*isIncremental*/) const
1952    {
1953        offsets.start = offsets.limit;
1954    }
1955    virtual UClassID getDynamicClassID() const;
1956    static UClassID U_EXPORT2 getStaticClassID();
1957};
1958UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1959
1960/**
1961 * Test Source-Target/Variant.
1962 */
1963void TransliteratorTest::TestSTV(void) {
1964    int32_t ns = Transliterator::countAvailableSources();
1965    if (ns < 0 || ns > 255) {
1966        errln((UnicodeString)"FAIL: Bad source count: " + ns);
1967        return;
1968    }
1969    int32_t i, j;
1970    for (i=0; i<ns; ++i) {
1971        UnicodeString source;
1972        Transliterator::getAvailableSource(i, source);
1973        logln((UnicodeString)"" + i + ": " + source);
1974        if (source.length() == 0) {
1975            errln("FAIL: empty source");
1976            continue;
1977        }
1978        int32_t nt = Transliterator::countAvailableTargets(source);
1979        if (nt < 0 || nt > 255) {
1980            errln((UnicodeString)"FAIL: Bad target count: " + nt);
1981            continue;
1982        }
1983        for (int32_t j=0; j<nt; ++j) {
1984            UnicodeString target;
1985            Transliterator::getAvailableTarget(j, source, target);
1986            logln((UnicodeString)" " + j + ": " + target);
1987            if (target.length() == 0) {
1988                errln("FAIL: empty target");
1989                continue;
1990            }
1991            int32_t nv = Transliterator::countAvailableVariants(source, target);
1992            if (nv < 0 || nv > 255) {
1993                errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1994                continue;
1995            }
1996            for (int32_t k=0; k<nv; ++k) {
1997                UnicodeString variant;
1998                Transliterator::getAvailableVariant(k, source, target, variant);
1999                if (variant.length() == 0) {
2000                    logln((UnicodeString)"  " + k + ": <empty>");
2001                } else {
2002                    logln((UnicodeString)"  " + k + ": " + variant);
2003                }
2004            }
2005        }
2006    }
2007
2008    // Test registration
2009    const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2010    const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011    const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2012    for (i=0; i<3; ++i) {
2013        Transliterator *t = new TestTrans(IDS[i]);
2014        if (t == 0) {
2015            errln("FAIL: out of memory");
2016            return;
2017        }
2018        if (t->getID() != IDS[i]) {
2019            errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2020            delete t;
2021            return;
2022        }
2023        Transliterator::registerInstance(t);
2024        UErrorCode status = U_ZERO_ERROR;
2025        t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2026        if (t == NULL) {
2027            errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2028                  IDS[i]);
2029        } else {
2030            logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2031                  IDS[i]);
2032            delete t;
2033        }
2034        Transliterator::unregister(IDS[i]);
2035        t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2036        if (t != NULL) {
2037            errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2038                  IDS[i]);
2039            delete t;
2040        }
2041    }
2042
2043    // Make sure getAvailable API reflects removal
2044    int32_t n = Transliterator::countAvailableIDs();
2045    for (i=0; i<n; ++i) {
2046        UnicodeString id = Transliterator::getAvailableID(i);
2047        for (j=0; j<3; ++j) {
2048            if (id.caseCompare(FULL_IDS[j],0)==0) {
2049                errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2050            }
2051        }
2052    }
2053    n = Transliterator::countAvailableTargets("Any");
2054    for (i=0; i<n; ++i) {
2055        UnicodeString t;
2056        Transliterator::getAvailableTarget(i, "Any", t);
2057        if (t.caseCompare(IDS[0],0)==0) {
2058            errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2059        }
2060    }
2061    n = Transliterator::countAvailableSources();
2062    for (i=0; i<n; ++i) {
2063        UnicodeString s;
2064        Transliterator::getAvailableSource(i, s);
2065        for (j=0; j<3; ++j) {
2066            if (SOURCES[j] == NULL) continue;
2067            if (s.caseCompare(SOURCES[j],0)==0) {
2068                errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2069            }
2070        }
2071    }
2072}
2073
2074/**
2075 * Test inverse of Greek-Latin; Title()
2076 */
2077void TransliteratorTest::TestCompoundInverse(void) {
2078    UParseError parseError;
2079    UErrorCode status = U_ZERO_ERROR;
2080    Transliterator *t = Transliterator::createInstance
2081        ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2082    if (t == 0) {
2083        dataerrln("FAIL: createInstance - %s", u_errorName(status));
2084        return;
2085    }
2086    UnicodeString exp("(Title);Latin-Greek");
2087    if (t->getID() == exp) {
2088        logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2089              t->getID());
2090    } else {
2091        errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2092              t->getID() + "\", expected \"" + exp + "\"");
2093    }
2094    delete t;
2095}
2096
2097/**
2098 * Test NFD chaining with RBT
2099 */
2100void TransliteratorTest::TestNFDChainRBT() {
2101    UParseError pe;
2102    UErrorCode ec = U_ZERO_ERROR;
2103    Transliterator* t = Transliterator::createFromRules(
2104                               "TEST", "::NFD; aa > Q; a > q;",
2105                               UTRANS_FORWARD, pe, ec);
2106    if (t == NULL || U_FAILURE(ec)) {
2107        dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2108        return;
2109    }
2110    expect(*t, "aa", "Q");
2111    delete t;
2112
2113    // TEMPORARY TESTS -- BEING DEBUGGED
2114//=-    UnicodeString s, s2;
2115//=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2116//=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2117//=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2118//=-    expect(*t, s, s2);
2119//=-    delete t;
2120//=-
2121//=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2122//=-    expect(*t, s2, s);
2123//=-    delete t;
2124//=-
2125//=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2126//=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2127//=-    expect(*t, s, s);
2128//=-    delete t;
2129
2130//    const char* source[] = {
2131//        /*
2132//        "\\u015Br\\u012Bmad",
2133//        "bhagavadg\\u012Bt\\u0101",
2134//        "adhy\\u0101ya",
2135//        "arjuna",
2136//        "vi\\u1E63\\u0101da",
2137//        "y\\u014Dga",
2138//        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2139//        "uv\\u0101cr\\u0325",
2140//        */
2141//        "rmk\\u1E63\\u0113t",
2142//      //"dharmak\\u1E63\\u0113tr\\u0113",
2143//        /*
2144//        "kuruk\\u1E63\\u0113tr\\u0113",
2145//        "samav\\u0113t\\u0101",
2146//        "yuyutsava-\\u1E25",
2147//        "m\\u0101mak\\u0101-\\u1E25",
2148//     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2149//        "kimakurvata",
2150//        "san\\u0304java",
2151//        */
2152//
2153//        0
2154//    };
2155//    const char* expected[] = {
2156//        /*
2157//        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2158//        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2159//        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2160//        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2161//        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2162//        "\\u092f\\u094b\\u0917",
2163//        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2164//        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2165//        */
2166//        "\\u0927",
2167//        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2168//        /*
2169//        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2170//        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2171//        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2172//        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2173//    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2174//        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2175//        "\\u0938\\u0902\\u091c\\u0935",
2176//        */
2177//        0
2178//    };
2179//    UErrorCode status = U_ZERO_ERROR;
2180//    UParseError parseError;
2181//    UnicodeString message;
2182//    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2183//    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2184//    if(U_FAILURE(status)){
2185//        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2186//        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2187//        delete latinToDevToLatin;
2188//        delete devToLatinToDev;
2189//        return;
2190//    }
2191//    UnicodeString gotResult;
2192//    for(int i= 0; source[i] != 0; i++){
2193//        gotResult = source[i];
2194//        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2195//        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2196//    }
2197//    delete latinToDevToLatin;
2198//    delete devToLatinToDev;
2199}
2200
2201/**
2202 * Inverse of "Null" should be "Null". (J21)
2203 */
2204void TransliteratorTest::TestNullInverse() {
2205    UParseError pe;
2206    UErrorCode ec = U_ZERO_ERROR;
2207    Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2208    if (t == 0 || U_FAILURE(ec)) {
2209        errln("FAIL: createInstance");
2210        return;
2211    }
2212    Transliterator *u = t->createInverse(ec);
2213    if (u == 0 || U_FAILURE(ec)) {
2214        errln("FAIL: createInverse");
2215        delete t;
2216        return;
2217    }
2218    if (u->getID() != "Null") {
2219        errln("FAIL: Inverse of Null should be Null");
2220    }
2221    delete t;
2222    delete u;
2223}
2224
2225/**
2226 * Check ID of inverse of alias. (J22)
2227 */
2228void TransliteratorTest::TestAliasInverseID() {
2229    UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2230    UParseError pe;
2231    UErrorCode ec = U_ZERO_ERROR;
2232    Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2233    if (t == 0 || U_FAILURE(ec)) {
2234        dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2235        return;
2236    }
2237    Transliterator *u = t->createInverse(ec);
2238    if (u == 0 || U_FAILURE(ec)) {
2239        errln("FAIL: createInverse");
2240        delete t;
2241        return;
2242    }
2243    UnicodeString exp = "Hangul-Latin";
2244    UnicodeString got = u->getID();
2245    if (got != exp) {
2246        errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2247              ", expected " + exp);
2248    }
2249    delete t;
2250    delete u;
2251}
2252
2253/**
2254 * Test IDs of inverses of compound transliterators. (J20)
2255 */
2256void TransliteratorTest::TestCompoundInverseID() {
2257    UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2258    UParseError pe;
2259    UErrorCode ec = U_ZERO_ERROR;
2260    Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2261    if (t == 0 || U_FAILURE(ec)) {
2262        dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2263        return;
2264    }
2265    Transliterator *u = t->createInverse(ec);
2266    if (u == 0 || U_FAILURE(ec)) {
2267        errln("FAIL: createInverse");
2268        delete t;
2269        return;
2270    }
2271    UnicodeString exp = "NFD(NFC);Jamo-Latin";
2272    UnicodeString got = u->getID();
2273    if (got != exp) {
2274        errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2275              ", expected " + exp);
2276    }
2277    delete t;
2278    delete u;
2279}
2280
2281/**
2282 * Test undefined variable.
2283
2284 */
2285void TransliteratorTest::TestUndefinedVariable() {
2286    UnicodeString rule = "$initial } a <> \\u1161;";
2287    UParseError pe;
2288    UErrorCode ec = U_ZERO_ERROR;
2289    Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2290    delete t;
2291    if (U_FAILURE(ec)) {
2292        logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2293              u_errorName(ec));
2294        return;
2295    }
2296    errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2297          u_errorName(ec));
2298}
2299
2300/**
2301 * Test empty context.
2302 */
2303void TransliteratorTest::TestEmptyContext() {
2304    expect(" { a } > b;", "xay a ", "xby b ");
2305}
2306
2307/**
2308* Test compound filter ID syntax
2309*/
2310void TransliteratorTest::TestCompoundFilterID(void) {
2311    static const char* DATA[] = {
2312        // Col. 1 = ID or rule set (latter must start with #)
2313
2314        // = columns > 1 are null if expect col. 1 to be illegal =
2315
2316        // Col. 2 = direction, "F..." or "R..."
2317        // Col. 3 = source string
2318        // Col. 4 = exp result
2319
2320        "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2321        "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2322        "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2323        "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2324        "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2325        "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2326        NULL,
2327    };
2328
2329    for (int32_t i=0; DATA[i]; i+=4) {
2330        UnicodeString id = CharsToUnicodeString(DATA[i]);
2331        UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2332            UTRANS_REVERSE : UTRANS_FORWARD;
2333        UnicodeString source;
2334        UnicodeString exp;
2335        if (DATA[i+2] != NULL) {
2336            source = CharsToUnicodeString(DATA[i+2]);
2337            exp = CharsToUnicodeString(DATA[i+3]);
2338        }
2339        UBool expOk = (DATA[i+1] != NULL);
2340        Transliterator* t = NULL;
2341        UParseError pe;
2342        UErrorCode ec = U_ZERO_ERROR;
2343        if (id.charAt(0) == 0x23/*#*/) {
2344            t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2345        } else {
2346            t = Transliterator::createInstance(id, direction, pe, ec);
2347        }
2348        UBool ok = (t != NULL && U_SUCCESS(ec));
2349        UnicodeString transID;
2350        if (t!=0) {
2351            transID = t->getID();
2352        }
2353        else {
2354            transID = UnicodeString("NULL", "");
2355        }
2356        if (ok == expOk) {
2357            logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2358                  u_errorName(ec));
2359            if (source.length() != 0) {
2360                expect(*t, source, exp);
2361            }
2362            delete t;
2363        } else {
2364            dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2365                  u_errorName(ec));
2366        }
2367    }
2368}
2369
2370/**
2371 * Test new property set syntax
2372 */
2373void TransliteratorTest::TestPropertySet() {
2374    expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2375    expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2376           "[ a stitch ]\n[ in time ]\r[ saves 9]");
2377}
2378
2379/**
2380 * Test various failure points of the new 2.0 engine.
2381 */
2382void TransliteratorTest::TestNewEngine() {
2383    UParseError pe;
2384    UErrorCode ec = U_ZERO_ERROR;
2385    Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2386    if (t == 0 || U_FAILURE(ec)) {
2387        dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2388        return;
2389    }
2390    // Katakana should be untouched
2391    expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2392           CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2393
2394    delete t;
2395
2396#if 1
2397    // This test will only work if Transliterator.ROLLBACK is
2398    // true.  Otherwise, this test will fail, revealing a
2399    // limitation of global filters in incremental mode.
2400    Transliterator *a =
2401        Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2402    Transliterator *A =
2403        Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2404    if (U_FAILURE(ec)) {
2405        delete a;
2406        delete A;
2407        return;
2408    }
2409
2410    Transliterator* array[3];
2411    array[0] = a;
2412    array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2413    array[2] = A;
2414    if (U_FAILURE(ec)) {
2415        errln("FAIL: createInstance NFD");
2416        delete a;
2417        delete A;
2418        delete array[1];
2419        return;
2420    }
2421
2422    t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2423    if (U_FAILURE(ec)) {
2424        errln("FAIL: UnicodeSet constructor");
2425        delete a;
2426        delete A;
2427        delete array[1];
2428        delete t;
2429        return;
2430    }
2431
2432    expect(*t, "aAaA", "bAbA");
2433
2434    assertTrue("countElements", t->countElements() == 3);
2435    assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2436    assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2437    assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2438    assertSuccess("getElement", ec);
2439
2440    delete a;
2441    delete A;
2442    delete array[1];
2443    delete t;
2444#endif
2445
2446    expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2447           "a",
2448           "ax");
2449
2450    UnicodeString gr = CharsToUnicodeString(
2451        "$ddot = \\u0308 ;"
2452        "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2453        "$rough = \\u0314 ;"
2454        "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2455        "\\u03b1 <> a ;"
2456        "$rough <> h ;");
2457
2458    expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2459}
2460
2461/**
2462 * Test quantified segment behavior.  We want:
2463 * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2464 */
2465void TransliteratorTest::TestQuantifiedSegment(void) {
2466    // The normal case
2467    expect("([abc]+) > x $1 x;", "cba", "xcbax");
2468
2469    // The tricky case; the quantifier is around the segment
2470    expect("([abc])+ > x $1 x;", "cba", "xax");
2471
2472    // Tricky case in reverse direction
2473    expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2474
2475    // Check post-context segment
2476    expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2477
2478    // Test toRule/toPattern for non-quantified segment.
2479    // Careful with spacing here.
2480    UnicodeString r("([a-c]){q} > x $1 x;");
2481    UParseError pe;
2482    UErrorCode ec = U_ZERO_ERROR;
2483    Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2484    if (U_FAILURE(ec)) {
2485        errln("FAIL: createFromRules");
2486        delete t;
2487        return;
2488    }
2489    UnicodeString rr;
2490    t->toRules(rr, TRUE);
2491    if (r != rr) {
2492        errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2493    } else {
2494        logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2495    }
2496    delete t;
2497
2498    // Test toRule/toPattern for quantified segment.
2499    // Careful with spacing here.
2500    r = "([a-c])+{q} > x $1 x;";
2501    t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2502    if (U_FAILURE(ec)) {
2503        errln("FAIL: createFromRules");
2504        delete t;
2505        return;
2506    }
2507    t->toRules(rr, TRUE);
2508    if (r != rr) {
2509        errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2510    } else {
2511        logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2512    }
2513    delete t;
2514}
2515
2516//======================================================================
2517// Ram's tests
2518//======================================================================
2519void TransliteratorTest::TestDevanagariLatinRT(){
2520    const int MAX_LEN= 52;
2521    const char* const source[MAX_LEN] = {
2522        "bh\\u0101rata",
2523        "kra",
2524        "k\\u1E63a",
2525        "khra",
2526        "gra",
2527        "\\u1E45ra",
2528        "cra",
2529        "chra",
2530        "j\\u00F1a",
2531        "jhra",
2532        "\\u00F1ra",
2533        "\\u1E6Dya",
2534        "\\u1E6Dhra",
2535        "\\u1E0Dya",
2536      //"r\\u0323ya", // \u095c is not valid in Devanagari
2537        "\\u1E0Dhya",
2538        "\\u1E5Bhra",
2539        "\\u1E47ra",
2540        "tta",
2541        "thra",
2542        "dda",
2543        "dhra",
2544        "nna",
2545        "pra",
2546        "phra",
2547        "bra",
2548        "bhra",
2549        "mra",
2550        "\\u1E49ra",
2551      //"l\\u0331ra",
2552        "yra",
2553        "\\u1E8Fra",
2554      //"l-",
2555        "vra",
2556        "\\u015Bra",
2557        "\\u1E63ra",
2558        "sra",
2559        "hma",
2560        "\\u1E6D\\u1E6Da",
2561        "\\u1E6D\\u1E6Dha",
2562        "\\u1E6Dh\\u1E6Dha",
2563        "\\u1E0D\\u1E0Da",
2564        "\\u1E0D\\u1E0Dha",
2565        "\\u1E6Dya",
2566        "\\u1E6Dhya",
2567        "\\u1E0Dya",
2568        "\\u1E0Dhya",
2569        // Not roundtrippable --
2570        // \\u0939\\u094d\\u094d\\u092E  - hma
2571        // \\u0939\\u094d\\u092E         - hma
2572        // CharsToUnicodeString("hma"),
2573        "hya",
2574        "\\u015Br\\u0325",
2575        "\\u015Bca",
2576        "\\u0115",
2577        "san\\u0304j\\u012Bb s\\u0113nagupta",
2578        "\\u0101nand vaddir\\u0101ju",
2579        "\\u0101",
2580        "a"
2581    };
2582    const char* const expected[MAX_LEN] = {
2583        "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2584        "\\u0915\\u094D\\u0930",          /* kra         */
2585        "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2586        "\\u0916\\u094D\\u0930",          /* khra        */
2587        "\\u0917\\u094D\\u0930",          /* gra         */
2588        "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2589        "\\u091A\\u094D\\u0930",          /* cra         */
2590        "\\u091B\\u094D\\u0930",          /* chra        */
2591        "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2592        "\\u091D\\u094D\\u0930",          /* jhra        */
2593        "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2594        "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2595        "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2596        "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2597      //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2598        "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2599        "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2600        "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2601        "\\u0924\\u094D\\u0924",          /* tta         */
2602        "\\u0925\\u094D\\u0930",          /* thra        */
2603        "\\u0926\\u094D\\u0926",          /* dda         */
2604        "\\u0927\\u094D\\u0930",          /* dhra        */
2605        "\\u0928\\u094D\\u0928",          /* nna         */
2606        "\\u092A\\u094D\\u0930",          /* pra         */
2607        "\\u092B\\u094D\\u0930",          /* phra        */
2608        "\\u092C\\u094D\\u0930",          /* bra         */
2609        "\\u092D\\u094D\\u0930",          /* bhra        */
2610        "\\u092E\\u094D\\u0930",          /* mra         */
2611        "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2612      //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2613        "\\u092F\\u094D\\u0930",          /* yra         */
2614        "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2615      //"l-",
2616        "\\u0935\\u094D\\u0930",          /* vra         */
2617        "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2618        "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2619        "\\u0938\\u094D\\u0930",          /* sra         */
2620        "\\u0939\\u094d\\u092E",          /* hma         */
2621        "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2622        "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2623        "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2624        "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2625        "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2626        "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2627        "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2628        "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2629        "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2630     // "hma",                         /* hma         */
2631        "\\u0939\\u094D\\u092F",          /* hya         */
2632        "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2633        "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2634        "\\u090d",                        /* e\\u0306    */
2635        "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2636        "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2637        "\\u0906",
2638        "\\u0905",
2639    };
2640    UErrorCode status = U_ZERO_ERROR;
2641    UParseError parseError;
2642    UnicodeString message;
2643    Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2644    Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2645    if(U_FAILURE(status)){
2646        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2647        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2648        return;
2649    }
2650    UnicodeString gotResult;
2651    for(int i= 0; i<MAX_LEN; i++){
2652        gotResult = source[i];
2653        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2654        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2655    }
2656    delete latinToDev;
2657    delete devToLatin;
2658}
2659
2660void TransliteratorTest::TestTeluguLatinRT(){
2661    const int MAX_LEN=10;
2662    const char* const source[MAX_LEN] = {
2663        "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2664        "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2665        "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2666        "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2667        "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2668        "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2669        "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2670        "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2671        "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2672        "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2673    };
2674
2675    const char* const expected[MAX_LEN] = {
2676        "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2677        "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2678        "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2679        "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680        "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2681        "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2682        "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2683        "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2684        "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2685        "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686    };
2687
2688    UErrorCode status = U_ZERO_ERROR;
2689    UParseError parseError;
2690    UnicodeString message;
2691    Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2692    Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2693    if(U_FAILURE(status)){
2694        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2695        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2696        return;
2697    }
2698    UnicodeString gotResult;
2699    for(int i= 0; i<MAX_LEN; i++){
2700        gotResult = source[i];
2701        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2702        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2703    }
2704    delete latinToDev;
2705    delete devToLatin;
2706}
2707
2708void TransliteratorTest::TestSanskritLatinRT(){
2709    const int MAX_LEN =16;
2710    const char* const source[MAX_LEN] = {
2711        "rmk\\u1E63\\u0113t",
2712        "\\u015Br\\u012Bmad",
2713        "bhagavadg\\u012Bt\\u0101",
2714        "adhy\\u0101ya",
2715        "arjuna",
2716        "vi\\u1E63\\u0101da",
2717        "y\\u014Dga",
2718        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2719        "uv\\u0101cr\\u0325",
2720        "dharmak\\u1E63\\u0113tr\\u0113",
2721        "kuruk\\u1E63\\u0113tr\\u0113",
2722        "samav\\u0113t\\u0101",
2723        "yuyutsava\\u1E25",
2724        "m\\u0101mak\\u0101\\u1E25",
2725    // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2726        "kimakurvata",
2727        "san\\u0304java",
2728    };
2729    const char* const expected[MAX_LEN] = {
2730        "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2731        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2732        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2733        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2734        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2735        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2736        "\\u092f\\u094b\\u0917",
2737        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2738        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2739        "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2740        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2742        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2743        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2744    //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2745        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2746        "\\u0938\\u0902\\u091c\\u0935",
2747    };
2748    UErrorCode status = U_ZERO_ERROR;
2749    UParseError parseError;
2750    UnicodeString message;
2751    Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2752    Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2753    if(U_FAILURE(status)){
2754        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2755        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2756        return;
2757    }
2758    UnicodeString gotResult;
2759    for(int i= 0; i<MAX_LEN; i++){
2760        gotResult = source[i];
2761        expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2762        expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2763    }
2764    delete latinToDev;
2765    delete devToLatin;
2766}
2767
2768
2769void TransliteratorTest::TestCompoundLatinRT(){
2770    const char* const source[] = {
2771        "rmk\\u1E63\\u0113t",
2772        "\\u015Br\\u012Bmad",
2773        "bhagavadg\\u012Bt\\u0101",
2774        "adhy\\u0101ya",
2775        "arjuna",
2776        "vi\\u1E63\\u0101da",
2777        "y\\u014Dga",
2778        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2779        "uv\\u0101cr\\u0325",
2780        "dharmak\\u1E63\\u0113tr\\u0113",
2781        "kuruk\\u1E63\\u0113tr\\u0113",
2782        "samav\\u0113t\\u0101",
2783        "yuyutsava\\u1E25",
2784        "m\\u0101mak\\u0101\\u1E25",
2785     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2786        "kimakurvata",
2787        "san\\u0304java"
2788    };
2789    const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2790    const char* const expected[MAX_LEN] = {
2791        "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2792        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2793        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2794        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2795        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2796        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2797        "\\u092f\\u094b\\u0917",
2798        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2799        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2800        "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2801        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2803        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2804        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2805    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2806        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2807        "\\u0938\\u0902\\u091c\\u0935"
2808    };
2809    if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2810        errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2811        return;
2812    }
2813
2814    UErrorCode status = U_ZERO_ERROR;
2815    UParseError parseError;
2816    UnicodeString message;
2817    Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2818    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2819    Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2820    Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2821
2822    if(U_FAILURE(status)){
2823        dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2824        dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2825        return;
2826    }
2827    UnicodeString gotResult;
2828    for(int i= 0; i<MAX_LEN; i++){
2829        gotResult = source[i];
2830        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2831        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2832        expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2833
2834    }
2835    delete(latinToDevToLatin);
2836    delete(devToLatinToDev);
2837    delete(devToTelToDev);
2838    delete(latinToTelToLatin);
2839}
2840
2841/**
2842 * Test Gurmukhi-Devanagari Tippi and Bindi
2843 */
2844void TransliteratorTest::TestGurmukhiDevanagari(){
2845    // the rule says:
2846    // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2847    // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2848    UErrorCode status = U_ZERO_ERROR;
2849    UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2850    UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2851    UParseError parseError;
2852
2853    UnicodeSetIterator vIter(vowel);
2854    UnicodeSetIterator nvIter(non_vowel);
2855    Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2856    if(U_FAILURE(status)) {
2857      dataerrln("Error creating transliterator %s", u_errorName(status));
2858      delete trans;
2859      return;
2860    }
2861    UnicodeString src (" \\u0902", -1, US_INV);
2862    UnicodeString expected(" \\u0A02", -1, US_INV);
2863    src = src.unescape();
2864    expected= expected.unescape();
2865
2866    while(vIter.next()){
2867        src.setCharAt(0,(UChar) vIter.getCodepoint());
2868        expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2869        expect(*trans,src,expected);
2870    }
2871
2872    expected.setCharAt(1,0x0A70);
2873    while(nvIter.next()){
2874        //src.setCharAt(0,(char) nvIter.codepoint);
2875        src.setCharAt(0,(UChar)nvIter.getCodepoint());
2876        expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2877        expect(*trans,src,expected);
2878    }
2879    delete trans;
2880}
2881/**
2882 * Test instantiation from a locale.
2883 */
2884void TransliteratorTest::TestLocaleInstantiation(void) {
2885    UParseError pe;
2886    UErrorCode ec = U_ZERO_ERROR;
2887    Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2888    if (U_FAILURE(ec)) {
2889        dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2890        delete t;
2891        return;
2892    }
2893    expect(*t, CharsToUnicodeString("\\u0430"), "a");
2894    delete t;
2895
2896    t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2897    if (U_FAILURE(ec)) {
2898        errln("FAIL: createInstance(en-el)");
2899        delete t;
2900        return;
2901    }
2902    expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2903    delete t;
2904}
2905
2906/**
2907 * Test title case handling of accent (should ignore accents)
2908 */
2909void TransliteratorTest::TestTitleAccents(void) {
2910    UParseError pe;
2911    UErrorCode ec = U_ZERO_ERROR;
2912    Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2913    if (U_FAILURE(ec)) {
2914        errln("FAIL: createInstance(Title)");
2915        delete t;
2916        return;
2917    }
2918    expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2919    delete t;
2920}
2921
2922/**
2923 * Basic test of a locale resource based rule.
2924 */
2925void TransliteratorTest::TestLocaleResource() {
2926    const char* DATA[] = {
2927        // id                    from               to
2928        //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2929        "Latin-el",              "b",               "\\u03bc\\u03c0",
2930        "Latin-Greek",           "b",               "\\u03B2",
2931        "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2932        "el-Latin",              "\\u03B2",         "v",
2933        "Greek-Latin",           "\\u03B2",         "b",
2934    };
2935    const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2936    for (int32_t i=0; i<DATA_length; i+=3) {
2937        UParseError pe;
2938        UErrorCode ec = U_ZERO_ERROR;
2939        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2940        if (U_FAILURE(ec)) {
2941            dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2942            delete t;
2943            continue;
2944        }
2945        expect(*t, CharsToUnicodeString(DATA[i+1]),
2946               CharsToUnicodeString(DATA[i+2]));
2947        delete t;
2948    }
2949}
2950
2951/**
2952 * Make sure parse errors reference the right line.
2953 */
2954void TransliteratorTest::TestParseError() {
2955    static const char* rule =
2956        "a > b;\n"
2957        "# more stuff\n"
2958        "d << b;";
2959    UErrorCode ec = U_ZERO_ERROR;
2960    UParseError pe;
2961    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2962    delete t;
2963    if (U_FAILURE(ec)) {
2964        UnicodeString err(pe.preContext);
2965        err.append((UChar)124/*|*/).append(pe.postContext);
2966        if (err.indexOf("d << b") >= 0) {
2967            logln("Ok: " + err);
2968        } else {
2969            errln("FAIL: " + err);
2970        }
2971    }
2972    else {
2973        errln("FAIL: no syntax error");
2974    }
2975    static const char* maskingRule =
2976        "a>x;\n"
2977        "# more stuff\n"
2978        "ab>y;";
2979    ec = U_ZERO_ERROR;
2980    delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2981    if (ec != U_RULE_MASK_ERROR) {
2982        errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2983    }
2984    else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2985        errln("FAIL: did not get expected precontext");
2986    }
2987    else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2988        errln("FAIL: did not get expected postcontext");
2989    }
2990}
2991
2992/**
2993 * Make sure sets on output are disallowed.
2994 */
2995void TransliteratorTest::TestOutputSet() {
2996    UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2997    UErrorCode ec = U_ZERO_ERROR;
2998    UParseError pe;
2999    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3000    delete t;
3001    if (U_FAILURE(ec)) {
3002        UnicodeString err(pe.preContext);
3003        err.append((UChar)124/*|*/).append(pe.postContext);
3004        logln("Ok: " + err);
3005        return;
3006    }
3007    errln("FAIL: No syntax error");
3008}
3009
3010/**
3011 * Test the use variable range pragma, making sure that use of
3012 * variable range characters is detected and flagged as an error.
3013 */
3014void TransliteratorTest::TestVariableRange() {
3015    UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3016    UErrorCode ec = U_ZERO_ERROR;
3017    UParseError pe;
3018    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3019    delete t;
3020    if (U_FAILURE(ec)) {
3021        UnicodeString err(pe.preContext);
3022        err.append((UChar)124/*|*/).append(pe.postContext);
3023        logln("Ok: " + err);
3024        return;
3025    }
3026    errln("FAIL: No syntax error");
3027}
3028
3029/**
3030 * Test invalid post context error handling
3031 */
3032void TransliteratorTest::TestInvalidPostContext() {
3033    UnicodeString rule = "a}b{c>d;";
3034    UErrorCode ec = U_ZERO_ERROR;
3035    UParseError pe;
3036    Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3037    delete t;
3038    if (U_FAILURE(ec)) {
3039        UnicodeString err(pe.preContext);
3040        err.append((UChar)124/*|*/).append(pe.postContext);
3041        if (err.indexOf("a}b{c") >= 0) {
3042            logln("Ok: " + err);
3043        } else {
3044            errln("FAIL: " + err);
3045        }
3046        return;
3047    }
3048    errln("FAIL: No syntax error");
3049}
3050
3051/**
3052 * Test ID form variants
3053 */
3054void TransliteratorTest::TestIDForms() {
3055    const char* DATA[] = {
3056        "NFC", NULL, "NFD",
3057        "nfd", NULL, "NFC", // make sure case is ignored
3058        "Any-NFKD", NULL, "Any-NFKC",
3059        "Null", NULL, "Null",
3060        "-nfkc", "nfkc", "NFKD",
3061        "-nfkc/", "nfkc", "NFKD",
3062        "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3063        "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3064        "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3065        "Source-", NULL, NULL,
3066        "Source/Variant-", NULL, NULL,
3067        "Source-/Variant", NULL, NULL,
3068        "/Variant", NULL, NULL,
3069        "/Variant-", NULL, NULL,
3070        "-/Variant", NULL, NULL,
3071        "-/", NULL, NULL,
3072        "-", NULL, NULL,
3073        "/", NULL, NULL,
3074    };
3075    const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3076
3077    for (int32_t i=0; i<DATA_length; i+=3) {
3078        const char* ID = DATA[i];
3079        const char* expID = DATA[i+1];
3080        const char* expInvID = DATA[i+2];
3081        UBool expValid = (expInvID != NULL);
3082        if (expID == NULL) {
3083            expID = ID;
3084        }
3085        UParseError pe;
3086        UErrorCode ec = U_ZERO_ERROR;
3087        Transliterator *t =
3088            Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3089        if (U_FAILURE(ec)) {
3090            if (!expValid) {
3091                logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3092            } else {
3093                dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3094            }
3095            delete t;
3096            continue;
3097        }
3098        Transliterator *u = t->createInverse(ec);
3099        if (U_FAILURE(ec)) {
3100            errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3101            delete t;
3102            delete u;
3103            continue;
3104        }
3105        if (t->getID() == expID &&
3106            u->getID() == expInvID) {
3107            logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3108        } else {
3109            errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3110                  t->getID() + " x getInverse() => " + u->getID() +
3111                  ", expected " + expInvID);
3112        }
3113        delete t;
3114        delete u;
3115    }
3116}
3117
3118static const UChar SPACE[]   = {32,0};
3119static const UChar NEWLINE[] = {10,0};
3120static const UChar RETURN[]  = {13,0};
3121static const UChar EMPTY[]   = {0};
3122
3123void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3124                                    const UnicodeString& testRulesForward) {
3125    UnicodeString rules2; t2.toRules(rules2, TRUE);
3126    //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3127    rules2.findAndReplace(SPACE, EMPTY);
3128    rules2.findAndReplace(NEWLINE, EMPTY);
3129    rules2.findAndReplace(RETURN, EMPTY);
3130
3131    UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3132
3133    if (rules2 != testRules) {
3134        errln(label);
3135        logln((UnicodeString)"GENERATED RULES: " + rules2);
3136        logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3137    }
3138}
3139
3140/**
3141 * Mark's toRules test.
3142 */
3143void TransliteratorTest::TestToRulesMark() {
3144    const char* testRules =
3145        "::[[:Latin:][:Mark:]];"
3146        "::NFKD (NFC);"
3147        "::Lower (Lower);"
3148        "a <> \\u03B1;" // alpha
3149        "::NFKC (NFD);"
3150        "::Upper (Lower);"
3151        "::Lower ();"
3152        "::([[:Greek:][:Mark:]]);"
3153        ;
3154    const char* testRulesForward =
3155        "::[[:Latin:][:Mark:]];"
3156        "::NFKD(NFC);"
3157        "::Lower(Lower);"
3158        "a > \\u03B1;"
3159        "::NFKC(NFD);"
3160        "::Upper (Lower);"
3161        "::Lower ();"
3162        ;
3163    const char* testRulesBackward =
3164        "::[[:Greek:][:Mark:]];"
3165        "::Lower (Upper);"
3166        "::NFD(NFKC);"
3167        "\\u03B1 > a;"
3168        "::Lower(Lower);"
3169        "::NFC(NFKD);"
3170        ;
3171    UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3172    UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3173
3174    UParseError pe;
3175    UErrorCode ec = U_ZERO_ERROR;
3176    Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3177    Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3178
3179    if (U_FAILURE(ec)) {
3180        delete t2;
3181        delete t3;
3182        dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3183        return;
3184    }
3185
3186    expect(*t2, source, target);
3187    expect(*t3, target, source);
3188
3189    checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3190    checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3191
3192    delete t2;
3193    delete t3;
3194}
3195
3196/**
3197 * Test Escape and Unescape transliterators.
3198 */
3199void TransliteratorTest::TestEscape() {
3200    UParseError pe;
3201    UErrorCode ec;
3202    Transliterator *t;
3203
3204    ec = U_ZERO_ERROR;
3205    t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3206    if (U_FAILURE(ec)) {
3207        errln((UnicodeString)"FAIL: createInstance");
3208    } else {
3209        expect(*t,
3210               UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3211               "@12Q");
3212    }
3213    delete t;
3214
3215    ec = U_ZERO_ERROR;
3216    t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3217    if (U_FAILURE(ec)) {
3218        errln((UnicodeString)"FAIL: createInstance");
3219    } else {
3220        expect(*t,
3221               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3222               UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3223    }
3224    delete t;
3225
3226    ec = U_ZERO_ERROR;
3227    t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3228    if (U_FAILURE(ec)) {
3229        errln((UnicodeString)"FAIL: createInstance");
3230    } else {
3231        expect(*t,
3232               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3233               UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3234    }
3235    delete t;
3236
3237    ec = U_ZERO_ERROR;
3238    t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3239    if (U_FAILURE(ec)) {
3240        errln((UnicodeString)"FAIL: createInstance");
3241    } else {
3242        expect(*t,
3243               CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3244               UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3245    }
3246    delete t;
3247}
3248
3249
3250void TransliteratorTest::TestAnchorMasking(){
3251    UnicodeString rule ("^a > Q; a > q;");
3252    UErrorCode status= U_ZERO_ERROR;
3253    UParseError parseError;
3254
3255    Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3256    if(U_FAILURE(status)){
3257        errln(UnicodeString("FAIL: ") + "ID" +
3258              ".createFromRules() => bad rules" +
3259              /*", parse error " + parseError.code +*/
3260              ", line " + parseError.line +
3261              ", offset " + parseError.offset +
3262              ", context " + prettify(parseError.preContext, TRUE) +
3263              ", rules: " + prettify(rule, TRUE));
3264    }
3265    delete t;
3266}
3267
3268/**
3269 * Make sure display names of variants look reasonable.
3270 */
3271void TransliteratorTest::TestDisplayName() {
3272#if UCONFIG_NO_FORMATTING
3273    logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3274    return;
3275#else
3276    static const char* DATA[] = {
3277        // ID, forward name, reverse name
3278        // Update the text as necessary -- the important thing is
3279        // not the text itself, but how various cases are handled.
3280
3281        // Basic test
3282        "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3283
3284        // Variants
3285        "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3286
3287        // Target-only IDs
3288        "NFC", "Any to NFC", "Any to NFD",
3289    };
3290
3291    int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3292
3293    Locale US("en", "US");
3294
3295    for (int32_t i=0; i<DATA_length; i+=3) {
3296        UnicodeString name;
3297        Transliterator::getDisplayName(DATA[i], US, name);
3298        if (name != DATA[i+1]) {
3299            dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3300                  name + ", expected " + DATA[i+1]);
3301        } else {
3302            logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3303        }
3304        UErrorCode ec = U_ZERO_ERROR;
3305        UParseError pe;
3306        Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3307        if (U_FAILURE(ec)) {
3308            delete t;
3309            dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3310            continue;
3311        }
3312        name = Transliterator::getDisplayName(t->getID(), US, name);
3313        if (name != DATA[i+2]) {
3314            dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3315                  name + ", expected " + DATA[i+2]);
3316        } else {
3317            logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3318        }
3319        delete t;
3320    }
3321#endif
3322}
3323
3324void TransliteratorTest::TestSpecialCases(void) {
3325    const UnicodeString registerRules[] = {
3326        "Any-Dev1", "x > X; y > Y;",
3327        "Any-Dev2", "XY > Z",
3328        "Greek-Latin/FAKE",
3329            CharsToUnicodeString
3330            ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3331        "" // END MARKER
3332    };
3333
3334    const UnicodeString testCases[] = {
3335        // NORMALIZATION
3336        // should add more test cases
3337        "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3338        "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339        "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340        "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341
3342        // mp -> b BUG
3343        "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3344        "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345
3346        // check for devanagari bug
3347        "nfd;Dev1;Dev2;nfc", "xy", "Z",
3348
3349        // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3350        "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3351                 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3352
3353        //TODO: enable this test once Titlecase works right
3354        /*
3355        "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356                 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3357                 */
3358        "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359                 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3360        "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3361                 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3362
3363        "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3364        "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365
3366         // FORMS OF S
3367        "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368                               CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3369        "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3370                               CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3371        "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3372                        CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3373        "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3374                        CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375        // Tatiana bug
3376        // Upper: TAT\\u02B9\\u00C2NA
3377        // Lower: tat\\u02B9\\u00E2na
3378        // Title: Tat\\u02B9\\u00E2na
3379        "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3380                 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3381        "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382                 CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383        "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384                 CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3385
3386        "" // END MARKER
3387    };
3388
3389    UParseError pos;
3390    int32_t i;
3391    for (i = 0; registerRules[i].length()!=0; i+=2) {
3392        UErrorCode status = U_ZERO_ERROR;
3393
3394        Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3395            registerRules[i+1], UTRANS_FORWARD, pos, status);
3396        if (U_FAILURE(status)) {
3397            dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3398        } else {
3399            Transliterator::registerInstance(t);
3400        }
3401    }
3402    for (i = 0; testCases[i].length()!=0; i+=3) {
3403        UErrorCode ec = U_ZERO_ERROR;
3404        UParseError pe;
3405        const UnicodeString& name = testCases[i];
3406        Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3407        if (U_FAILURE(ec)) {
3408            dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3409            delete t;
3410            continue;
3411        }
3412        const UnicodeString& id = t->getID();
3413        const UnicodeString& source = testCases[i+1];
3414        UnicodeString target;
3415
3416        // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3417
3418        if (testCases[i+2].length() > 0) {
3419            target = testCases[i+2];
3420        } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3421            Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3422        } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3423            Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3424        } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3425            Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3426        } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3427            Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3428        } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3429            target = source;
3430            target.toLower(Locale::getUS());
3431        } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3432            target = source;
3433            target.toUpper(Locale::getUS());
3434        }
3435        if (U_FAILURE(ec)) {
3436            errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3437            continue;
3438        }
3439
3440        expect(*t, source, target);
3441        delete t;
3442    }
3443    for (i = 0; registerRules[i].length()!=0; i+=2) {
3444        Transliterator::unregister(registerRules[i]);
3445    }
3446}
3447
3448char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3449    if (ch <= 0xFFFF) {
3450        sprintf(buffer, "\\u%04x", (int)ch);
3451    } else {
3452        sprintf(buffer, "\\U%08x", (int)ch);
3453    }
3454    return buffer;
3455}
3456
3457void TransliteratorTest::TestSurrogateCasing (void) {
3458    // check that casing handles surrogates
3459    // titlecase is currently defective
3460    char buffer[20];
3461    UChar buffer2[20];
3462    UChar32 dee;
3463    U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3464    UnicodeString DEE(u_totitle(dee));
3465    if (DEE != DESERET_DEE) {
3466        err("Fails titlecase of surrogates");
3467        err(Char32ToEscapedChars(dee, buffer));
3468        err(", ");
3469        errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3470    }
3471
3472    UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3473    UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3474    UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3475    UErrorCode status= U_ZERO_ERROR;
3476
3477    u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3478    if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3479        errln("Fails: Can't uppercase surrogates.");
3480    }
3481
3482    status= U_ZERO_ERROR;
3483    u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3484    if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3485        errln("Fails: Can't lowercase surrogates.");
3486    }
3487}
3488
3489static void _trans(Transliterator& t, const UnicodeString& src,
3490                   UnicodeString& result) {
3491    result = src;
3492    t.transliterate(result);
3493}
3494
3495static void _trans(const UnicodeString& id, const UnicodeString& src,
3496                   UnicodeString& result, UErrorCode ec) {
3497    UParseError pe;
3498    Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3499    if (U_SUCCESS(ec)) {
3500        _trans(*t, src, result);
3501    }
3502    delete t;
3503}
3504
3505static UnicodeString _findMatch(const UnicodeString& source,
3506                                       const UnicodeString* pairs) {
3507    UnicodeString empty;
3508    for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3509        if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3510            return pairs[i+1];
3511        }
3512    }
3513    return empty;
3514}
3515
3516// Check to see that incremental gets at least part way through a reasonable string.
3517
3518void TransliteratorTest::TestIncrementalProgress(void) {
3519    UErrorCode ec = U_ZERO_ERROR;
3520    UnicodeString latinTest = "The Quick Brown Fox.";
3521    UnicodeString devaTest;
3522    _trans("Latin-Devanagari", latinTest, devaTest, ec);
3523    UnicodeString kataTest;
3524    _trans("Latin-Katakana", latinTest, kataTest, ec);
3525    if (U_FAILURE(ec)) {
3526        errln("FAIL: Internal error");
3527        return;
3528    }
3529    const UnicodeString tests[] = {
3530        "Any", latinTest,
3531        "Latin", latinTest,
3532        "Halfwidth", latinTest,
3533        "Devanagari", devaTest,
3534        "Katakana", kataTest,
3535        "" // END MARKER
3536    };
3537
3538    UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3539    int32_t i = 0, j=0, k=0;
3540    int32_t sources = Transliterator::countAvailableSources();
3541    for (i = 0; i < sources; i++) {
3542        UnicodeString source;
3543        Transliterator::getAvailableSource(i, source);
3544        UnicodeString test = _findMatch(source, tests);
3545        if (test.length() == 0) {
3546            logln((UnicodeString)"Skipping " + source + "-X");
3547            continue;
3548        }
3549        int32_t targets = Transliterator::countAvailableTargets(source);
3550        for (j = 0; j < targets; j++) {
3551            UnicodeString target;
3552            Transliterator::getAvailableTarget(j, source, target);
3553            int32_t variants = Transliterator::countAvailableVariants(source, target);
3554            for (k =0; k< variants; k++) {
3555                UnicodeString variant;
3556                UParseError err;
3557                UErrorCode status = U_ZERO_ERROR;
3558
3559                Transliterator::getAvailableVariant(k, source, target, variant);
3560                UnicodeString id = source + "-" + target + "/" + variant;
3561
3562                Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3563                if (U_FAILURE(status)) {
3564                    dataerrln((UnicodeString)"FAIL: Could not create " + id);
3565                    delete t;
3566                    continue;
3567                }
3568                status = U_ZERO_ERROR;
3569                CheckIncrementalAux(t, test);
3570
3571                UnicodeString rev;
3572                _trans(*t, test, rev);
3573                Transliterator *inv = t->createInverse(status);
3574                if (U_FAILURE(status)) {
3575#if UCONFIG_NO_BREAK_ITERATION
3576                    // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3577                    if (id.compare((UnicodeString)"Latin-Thai/") != 0)
3578#endif
3579                        errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3580
3581                    delete t;
3582                    delete inv;
3583                    continue;
3584                }
3585                CheckIncrementalAux(inv, rev);
3586                delete t;
3587                delete inv;
3588            }
3589        }
3590    }
3591}
3592
3593void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3594                                                      const UnicodeString& input) {
3595    UErrorCode ec = U_ZERO_ERROR;
3596    UTransPosition pos;
3597    UnicodeString test = input;
3598
3599    pos.contextStart = 0;
3600    pos.contextLimit = input.length();
3601    pos.start = 0;
3602    pos.limit = input.length();
3603
3604    t->transliterate(test, pos, ec);
3605    if (U_FAILURE(ec)) {
3606        errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3607        return;
3608    }
3609    UBool gotError = FALSE;
3610    (void)gotError;    // Suppress set but not used warning.
3611
3612    // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3613
3614    if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3615        errln((UnicodeString)"No Progress, " +
3616              t->getID() + ": " + formatInput(test, input, pos));
3617        gotError = TRUE;
3618    } else {
3619        logln((UnicodeString)"PASS Progress, " +
3620              t->getID() + ": " + formatInput(test, input, pos));
3621    }
3622    t->finishTransliteration(test, pos);
3623    if (pos.start != pos.limit) {
3624        errln((UnicodeString)"Incomplete, " +
3625              t->getID() + ": " + formatInput(test, input, pos));
3626        gotError = TRUE;
3627    }
3628}
3629
3630void TransliteratorTest::TestFunction() {
3631    // Careful with spacing and ';' here:  Phrase this exactly
3632    // as toRules() is going to return it.  If toRules() changes
3633    // with regard to spacing or ';', then adjust this string.
3634    UnicodeString rule =
3635        "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3636
3637    UParseError pe;
3638    UErrorCode ec = U_ZERO_ERROR;
3639    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3640    if (t == NULL) {
3641        dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3642        return;
3643    }
3644
3645    UnicodeString r;
3646    t->toRules(r, TRUE);
3647    if (r == rule) {
3648        logln((UnicodeString)"OK: toRules() => " + r);
3649    } else {
3650        errln((UnicodeString)"FAIL: toRules() => " + r +
3651              ", expected " + rule);
3652    }
3653
3654    expect(*t, "The Quick Brown Fox",
3655           UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3656
3657    delete t;
3658}
3659
3660void TransliteratorTest::TestInvalidBackRef(void) {
3661    UnicodeString rule =  ". > $1;";
3662    UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3663    UParseError pe;
3664    UErrorCode ec = U_ZERO_ERROR;
3665    Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3666    Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3667
3668    if (t != NULL) {
3669        errln("FAIL: createFromRules should have returned NULL");
3670        delete t;
3671    }
3672
3673    if (t2 != NULL) {
3674        errln("FAIL: createFromRules should have returned NULL");
3675        delete t2;
3676    }
3677
3678    if (U_SUCCESS(ec)) {
3679        errln("FAIL: Ok: . > $1; => no error");
3680    } else {
3681        logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3682    }
3683}
3684
3685void TransliteratorTest::TestMulticharStringSet() {
3686    // Basic testing
3687    const char* rule =
3688        "       [{aa}]       > x;"
3689        "         a          > y;"
3690        "       [b{bc}]      > z;"
3691        "[{gd}] { e          > q;"
3692        "         e } [{fg}] > r;" ;
3693
3694    UParseError pe;
3695    UErrorCode ec = U_ZERO_ERROR;
3696    Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3697    if (t == NULL || U_FAILURE(ec)) {
3698        delete t;
3699        errln("FAIL: createFromRules failed");
3700        return;
3701    }
3702
3703    expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3704           "y x yz z d gd de gdq gdqfg ddrfg");
3705    delete t;
3706
3707    // Overlapped string test.  Make sure that when multiple
3708    // strings can match that the longest one is matched.
3709    rule =
3710        "    [a {ab} {abc}]    > x;"
3711        "           b          > y;"
3712        "           c          > z;"
3713        " q [t {st} {rst}] { e > p;" ;
3714
3715    t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3716    if (t == NULL || U_FAILURE(ec)) {
3717        delete t;
3718        errln("FAIL: createFromRules failed");
3719        return;
3720    }
3721
3722    expect(*t, "a ab abc qte qste qrste",
3723           "x x x qtp qstp qrstp");
3724    delete t;
3725}
3726
3727// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3728// BEGIN TestUserFunction support factory
3729
3730Transliterator* _TUFF[4];
3731UnicodeString* _TUFID[4];
3732
3733static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3734                                   Transliterator::Token context) {
3735    return _TUFF[context.integer]->clone();
3736}
3737
3738static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3739    _TUFF[n] = t;
3740    _TUFID[n] = new UnicodeString(ID);
3741    Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3742}
3743
3744static void _TUFUnreg(int32_t n) {
3745    if (_TUFF[n] != NULL) {
3746        Transliterator::unregister(*_TUFID[n]);
3747        delete _TUFF[n];
3748        delete _TUFID[n];
3749    }
3750}
3751
3752// END TestUserFunction support factory
3753// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3754
3755/**
3756 * Test that user-registered transliterators can be used under function
3757 * syntax.
3758 */
3759void TransliteratorTest::TestUserFunction() {
3760
3761    Transliterator* t;
3762    UParseError pe;
3763    UErrorCode ec = U_ZERO_ERROR;
3764
3765    // Setup our factory
3766    int32_t i;
3767    for (i=0; i<4; ++i) {
3768        _TUFF[i] = NULL;
3769    }
3770
3771    // There's no need to register inverses if we don't use them
3772    t = Transliterator::createFromRules("gif",
3773                                        UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3774                                        UTRANS_FORWARD, pe, ec);
3775    if (t == NULL || U_FAILURE(ec)) {
3776        dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3777        return;
3778    }
3779    _TUFReg("Any-gif", t, 0);
3780
3781    t = Transliterator::createFromRules("RemoveCurly",
3782                                        UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3783                                        UTRANS_FORWARD, pe, ec);
3784    if (t == NULL || U_FAILURE(ec)) {
3785        errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3786        goto FAIL;
3787    }
3788    expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3789    _TUFReg("Any-RemoveCurly", t, 1);
3790
3791    logln("Trying &hex");
3792    t = Transliterator::createFromRules("hex2",
3793                                        "(.) > &hex($1);",
3794                                        UTRANS_FORWARD, pe, ec);
3795    if (t == NULL || U_FAILURE(ec)) {
3796        errln("FAIL: createFromRules");
3797        goto FAIL;
3798    }
3799    logln("Registering");
3800    _TUFReg("Any-hex2", t, 2);
3801    t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3802    if (t == NULL || U_FAILURE(ec)) {
3803        errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3804        goto FAIL;
3805    }
3806    expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3807    delete t;
3808
3809    logln("Trying &gif");
3810    t = Transliterator::createFromRules("gif2",
3811                                        "(.) > &Gif(&Hex2($1));",
3812                                        UTRANS_FORWARD, pe, ec);
3813    if (t == NULL || U_FAILURE(ec)) {
3814        errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3815        goto FAIL;
3816    }
3817    logln("Registering");
3818    _TUFReg("Any-gif2", t, 3);
3819    t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3820    if (t == NULL || U_FAILURE(ec)) {
3821        errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3822        goto FAIL;
3823    }
3824    expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3825           "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3826    delete t;
3827
3828    // Test that filters are allowed after &
3829    t = Transliterator::createFromRules("test",
3830                                        "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3831                                        UTRANS_FORWARD, pe, ec);
3832    if (t == NULL || U_FAILURE(ec)) {
3833        errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3834        goto FAIL;
3835    }
3836    expect(*t, "abc",
3837           UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3838    delete t;
3839
3840 FAIL:
3841    for (i=0; i<4; ++i) {
3842        _TUFUnreg(i);
3843    }
3844}
3845
3846/**
3847 * Test the Any-X transliterators.
3848 */
3849void TransliteratorTest::TestAnyX(void) {
3850    UParseError parseError;
3851    UErrorCode status = U_ZERO_ERROR;
3852    Transliterator* anyLatin =
3853        Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3854    if (anyLatin==0) {
3855        dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3856        delete anyLatin;
3857        return;
3858    }
3859
3860    expect(*anyLatin,
3861           CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3862           CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3863
3864    delete anyLatin;
3865}
3866
3867/**
3868 * Test Any-X transliterators with sample letters from all scripts.
3869 */
3870void TransliteratorTest::TestAny(void) {
3871    UErrorCode status = U_ZERO_ERROR;
3872    // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3873    //       function call parameters going on in this test.
3874    UnicodeSet alphabetic("[:alphabetic:]", status);
3875    if (U_FAILURE(status)) {
3876        dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3877        return;
3878    }
3879    alphabetic.freeze();
3880
3881    UnicodeString testString;
3882    for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3883        const char *scriptName = uscript_getShortName((UScriptCode)i);
3884        if (scriptName == NULL) {
3885            errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3886            return;
3887        }
3888
3889        UnicodeSet sample;
3890        sample.applyPropertyAlias("script", scriptName, status);
3891        if (U_FAILURE(status)) {
3892            errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3893            return;
3894        }
3895        sample.retainAll(alphabetic);
3896        for (int32_t count=0; count<5; count++) {
3897            UChar32 c = sample.charAt(count);
3898            if (c == -1) {
3899                break;
3900            }
3901            testString.append(c);
3902        }
3903    }
3904
3905    UParseError parseError;
3906    Transliterator* anyLatin =
3907        Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3908    if (U_FAILURE(status)) {
3909        dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3910        return;
3911    }
3912
3913    logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3914    anyLatin->transliterate(testString);
3915    logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3916    delete anyLatin;
3917}
3918
3919
3920/**
3921 * Test the source and target set API.  These are only implemented
3922 * for RBT and CompoundTransliterator at this time.
3923 */
3924void TransliteratorTest::TestSourceTargetSet() {
3925    UErrorCode ec = U_ZERO_ERROR;
3926
3927    // Rules
3928    const char* r =
3929        "a > b; "
3930        "r [x{lu}] > q;";
3931
3932    // Expected source
3933    UnicodeSet expSrc("[arx{lu}]", ec);
3934
3935    // Expected target
3936    UnicodeSet expTrg("[bq]", ec);
3937
3938    UParseError pe;
3939    Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3940
3941    if (U_FAILURE(ec)) {
3942        delete t;
3943        errln("FAIL: Couldn't set up test");
3944        return;
3945    }
3946
3947    UnicodeSet src; t->getSourceSet(src);
3948    UnicodeSet trg; t->getTargetSet(trg);
3949
3950    if (src == expSrc && trg == expTrg) {
3951        UnicodeString a, b;
3952        logln((UnicodeString)"Ok: " +
3953              r + " => source = " + src.toPattern(a, TRUE) +
3954              ", target = " + trg.toPattern(b, TRUE));
3955    } else {
3956        UnicodeString a, b, c, d;
3957        errln((UnicodeString)"FAIL: " +
3958              r + " => source = " + src.toPattern(a, TRUE) +
3959              ", expected " + expSrc.toPattern(b, TRUE) +
3960              "; target = " + trg.toPattern(c, TRUE) +
3961              ", expected " + expTrg.toPattern(d, TRUE));
3962    }
3963
3964    delete t;
3965}
3966
3967/**
3968 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3969 */
3970void TransliteratorTest::TestPatternWhiteSpace() {
3971    // Rules
3972    const char* r = "a > \\u200E b;";
3973
3974    UErrorCode ec = U_ZERO_ERROR;
3975    UParseError pe;
3976    Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3977
3978    if (U_FAILURE(ec)) {
3979        errln("FAIL: Couldn't set up test");
3980    } else {
3981        expect(*t, "a", "b");
3982    }
3983    delete t;
3984
3985    // UnicodeSet
3986    ec = U_ZERO_ERROR;
3987    UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3988
3989    if (U_FAILURE(ec)) {
3990        errln("FAIL: Couldn't set up test");
3991    } else {
3992        if (set.contains(0x200E)) {
3993            errln("FAIL: U+200E not being ignored by UnicodeSet");
3994        }
3995    }
3996}
3997//======================================================================
3998// this method is in TestUScript.java
3999//======================================================================
4000void TransliteratorTest::TestAllCodepoints(){
4001    UScriptCode code= USCRIPT_INVALID_CODE;
4002    char id[256]={'\0'};
4003    char abbr[256]={'\0'};
4004    char newId[256]={'\0'};
4005    char newAbbrId[256]={'\0'};
4006    char oldId[256]={'\0'};
4007    char oldAbbrId[256]={'\0'};
4008
4009    UErrorCode status =U_ZERO_ERROR;
4010    UParseError pe;
4011
4012    for(uint32_t i = 0; i<=0x10ffff; i++){
4013        code =  uscript_getScript(i,&status);
4014        if(code == USCRIPT_INVALID_CODE){
4015            dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4016        }
4017        const char* myId = uscript_getName(code);
4018        if(!myId) {
4019          dataerrln("Valid script code returned NULL name. Check your data!");
4020          return;
4021        }
4022        uprv_strcpy(id,myId);
4023        uprv_strcpy(abbr,uscript_getShortName(code));
4024
4025        uprv_strcpy(newId,"[:");
4026        uprv_strcat(newId,id);
4027        uprv_strcat(newId,":];NFD");
4028
4029        uprv_strcpy(newAbbrId,"[:");
4030        uprv_strcat(newAbbrId,abbr);
4031        uprv_strcat(newAbbrId,":];NFD");
4032
4033        if(uprv_strcmp(newId,oldId)!=0){
4034            Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4035            if(t==NULL || U_FAILURE(status)){
4036                dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4037            }
4038            delete t;
4039        }
4040        if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4041            Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4042            if(t==NULL || U_FAILURE(status)){
4043                dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4044            }
4045            delete t;
4046        }
4047        uprv_strcpy(oldId,newId);
4048        uprv_strcpy(oldAbbrId, newAbbrId);
4049
4050    }
4051
4052}
4053
4054#define TEST_TRANSLIT_ID(id, cls) { \
4055  UErrorCode ec = U_ZERO_ERROR; \
4056  Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4057  if (U_FAILURE(ec)) { \
4058    dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4059  } else { \
4060    if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4061      errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4062    } \
4063    /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4064  } \
4065  delete t; \
4066}
4067
4068#define TEST_TRANSLIT_RULE(rule, cls) { \
4069  UErrorCode ec = U_ZERO_ERROR; \
4070  UParseError pe; \
4071  Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4072  if (U_FAILURE(ec)) { \
4073    errln("FAIL: Couldn't create " rule); \
4074  } else { \
4075    if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4076      errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077    } \
4078    /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079  } \
4080  delete t; \
4081}
4082
4083void TransliteratorTest::TestBoilerplate() {
4084    TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4085    TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4086    TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4087    TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4088    TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4089    TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4090    TEST_TRANSLIT_ID("Null", NullTransliterator);
4091    TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4092    TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4093    TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4094    TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4095    TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4096    TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4097}
4098
4099void TransliteratorTest::TestAlternateSyntax() {
4100    // U+2206 == &
4101    // U+2190 == <
4102    // U+2192 == >
4103    // U+2194 == <>
4104    expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4105           "abc",
4106           "xbz");
4107    expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4108           CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4109           UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4110}
4111
4112static const char* BEGIN_END_RULES[] = {
4113    // [0]
4114    "abc > xy;"
4115    "aba > z;",
4116
4117    // [1]
4118/*
4119    "::BEGIN;"
4120    "abc > xy;"
4121    "::END;"
4122    "::BEGIN;"
4123    "aba > z;"
4124    "::END;",
4125*/
4126    "", // test case commented out below, this is here to keep from messing up the indexes
4127
4128    // [2]
4129/*
4130    "abc > xy;"
4131    "::BEGIN;"
4132    "aba > z;"
4133    "::END;",
4134*/
4135    "", // test case commented out below, this is here to keep from messing up the indexes
4136
4137    // [3]
4138/*
4139    "::BEGIN;"
4140    "abc > xy;"
4141    "::END;"
4142    "aba > z;",
4143*/
4144    "", // test case commented out below, this is here to keep from messing up the indexes
4145
4146    // [4]
4147    "abc > xy;"
4148    "::Null;"
4149    "aba > z;",
4150
4151    // [5]
4152    "::Upper;"
4153    "ABC > xy;"
4154    "AB > x;"
4155    "C > z;"
4156    "::Upper;"
4157    "XYZ > p;"
4158    "XY > q;"
4159    "Z > r;"
4160    "::Upper;",
4161
4162    // [6]
4163    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4164    "$delim = [\\-$ws];"
4165    "$ws $delim* > ' ';"
4166    "'-' $delim* > '-';",
4167
4168    // [7]
4169    "::Null;"
4170    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4171    "$delim = [\\-$ws];"
4172    "$ws $delim* > ' ';"
4173    "'-' $delim* > '-';",
4174
4175    // [8]
4176    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4177    "$delim = [\\-$ws];"
4178    "$ws $delim* > ' ';"
4179    "'-' $delim* > '-';"
4180    "::Null;",
4181
4182    // [9]
4183    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4184    "$delim = [\\-$ws];"
4185    "::Null;"
4186    "$ws $delim* > ' ';"
4187    "'-' $delim* > '-';",
4188
4189    // [10]
4190/*
4191    "::BEGIN;"
4192    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4193    "$delim = [\\-$ws];"
4194    "::END;"
4195    "$ws $delim* > ' ';"
4196    "'-' $delim* > '-';",
4197*/
4198    "", // test case commented out below, this is here to keep from messing up the indexes
4199
4200    // [11]
4201/*
4202    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4203    "$delim = [\\-$ws];"
4204    "::BEGIN;"
4205    "$ws $delim* > ' ';"
4206    "'-' $delim* > '-';"
4207    "::END;",
4208*/
4209    "", // test case commented out below, this is here to keep from messing up the indexes
4210
4211    // [12]
4212/*
4213    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214    "$delim = [\\-$ws];"
4215    "$ab = [ab];"
4216    "::BEGIN;"
4217    "$ws $delim* > ' ';"
4218    "'-' $delim* > '-';"
4219    "::END;"
4220    "::BEGIN;"
4221    "$ab { ' ' } $ab > '-';"
4222    "c { ' ' > ;"
4223    "::END;"
4224    "::BEGIN;"
4225    "'a-a' > a\\%|a;"
4226    "::END;",
4227*/
4228    "", // test case commented out below, this is here to keep from messing up the indexes
4229
4230    // [13]
4231    "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4232    "$delim = [\\-$ws];"
4233    "$ab = [ab];"
4234    "::Null;"
4235    "$ws $delim* > ' ';"
4236    "'-' $delim* > '-';"
4237    "::Null;"
4238    "$ab { ' ' } $ab > '-';"
4239    "c { ' ' > ;"
4240    "::Null;"
4241    "'a-a' > a\\%|a;",
4242
4243    // [14]
4244/*
4245    "::[abc];"
4246    "::BEGIN;"
4247    "abc > xy;"
4248    "::END;"
4249    "::BEGIN;"
4250    "aba > yz;"
4251    "::END;"
4252    "::Upper;",
4253*/
4254    "", // test case commented out below, this is here to keep from messing up the indexes
4255
4256    // [15]
4257    "::[abc];"
4258    "abc > xy;"
4259    "::Null;"
4260    "aba > yz;"
4261    "::Upper;",
4262
4263    // [16]
4264/*
4265    "::[abc];"
4266    "::BEGIN;"
4267    "abc <> xy;"
4268    "::END;"
4269    "::BEGIN;"
4270    "aba <> yz;"
4271    "::END;"
4272    "::Upper(Lower);"
4273    "::([XYZ]);"
4274*/
4275    "", // test case commented out below, this is here to keep from messing up the indexes
4276
4277    // [17]
4278    "::[abc];"
4279    "abc <> xy;"
4280    "::Null;"
4281    "aba <> yz;"
4282    "::Upper(Lower);"
4283    "::([XYZ]);"
4284};
4285
4286/*
4287(This entire test is commented out below and will need some heavy revision when we re-add
4288the ::BEGIN/::END stuff)
4289static const char* BOGUS_BEGIN_END_RULES[] = {
4290    // [7]
4291    "::BEGIN;"
4292    "abc > xy;"
4293    "::BEGIN;"
4294    "aba > z;"
4295    "::END;"
4296    "::END;",
4297
4298    // [8]
4299    "abc > xy;"
4300    " aba > z;"
4301    "::END;",
4302
4303    // [9]
4304    "::BEGIN;"
4305    "::Upper;"
4306    "::END;"
4307};
4308static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4309*/
4310
4311static const char* BEGIN_END_TEST_CASES[] = {
4312    // rules             input                   expected output
4313    BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4314//    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4315//    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4316//    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4317    BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4318    BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4319
4320    BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4321    BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4322    BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4323    BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4324//    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4325//    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4326//    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4327//    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4328//    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4329    BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4330    BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4331    BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4332
4333//    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4334    BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335//    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336    BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4337};
4338static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4339
4340void TransliteratorTest::TestBeginEnd() {
4341    // run through the list of test cases above
4342    int32_t i = 0;
4343    for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4344        expect((UnicodeString)"Test case #" + (i / 3),
4345               UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4346               UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4347               UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4348    }
4349
4350    // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4351    UParseError parseError;
4352    UErrorCode status = U_ZERO_ERROR;
4353    Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4354            UTRANS_REVERSE, parseError, status);
4355    if (reversed == 0 || U_FAILURE(status)) {
4356        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4357    } else {
4358        expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4359    }
4360    delete reversed;
4361
4362    // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4363    // that all of them cause errors
4364/*
4365(commented out until we have the real ::BEGIN/::END stuff in place
4366    for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4367        UParseError parseError;
4368        UErrorCode status = U_ZERO_ERROR;
4369        Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4370                UTRANS_FORWARD, parseError, status);
4371        if (!U_FAILURE(status)) {
4372            delete t;
4373            errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4374        }
4375    }
4376*/
4377}
4378
4379void TransliteratorTest::TestBeginEndToRules() {
4380    // run through the same list of test cases we used above, but this time, instead of just
4381    // instantiating a Transliterator from the rules and running the test against it, we instantiate
4382    // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4383    // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4384    // to (i.e., does the same thing as) the original rule set
4385    for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4386        UParseError parseError;
4387        UErrorCode status = U_ZERO_ERROR;
4388        Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4389                UTRANS_FORWARD, parseError, status);
4390        if (U_FAILURE(status)) {
4391            reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4392        } else {
4393            UnicodeString rules;
4394            t->toRules(rules, TRUE);
4395            Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4396                    UTRANS_FORWARD, parseError, status);
4397            if (U_FAILURE(status)) {
4398                reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4399                        parseError, status);
4400                delete t;
4401            } else {
4402                expect(*t2,
4403                       UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4404                       UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4405                delete t;
4406                delete t2;
4407            }
4408        }
4409    }
4410
4411    // do the same thing for the reversible test case
4412    UParseError parseError;
4413    UErrorCode status = U_ZERO_ERROR;
4414    Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4415            UTRANS_REVERSE, parseError, status);
4416    if (U_FAILURE(status)) {
4417        reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4418    } else {
4419        UnicodeString rules;
4420        reversed->toRules(rules, FALSE);
4421        Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4422                parseError, status);
4423        if (U_FAILURE(status)) {
4424            reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4425                    parseError, status);
4426            delete reversed;
4427        } else {
4428            expect(*reversed2,
4429                   UnicodeString("xy XY XYZ yz YZ"),
4430                   UnicodeString("xy abc xaba yz aba"));
4431            delete reversed;
4432            delete reversed2;
4433        }
4434    }
4435}
4436
4437void TransliteratorTest::TestRegisterAlias() {
4438    UnicodeString longID("Lower;[aeiou]Upper");
4439    UnicodeString shortID("Any-CapVowels");
4440    UnicodeString reallyShortID("CapVowels");
4441
4442    Transliterator::registerAlias(shortID, longID);
4443
4444    UErrorCode err = U_ZERO_ERROR;
4445    Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4446    if (U_FAILURE(err)) {
4447        errln("Failed to instantiate transliterator with long ID");
4448        Transliterator::unregister(shortID);
4449        return;
4450    }
4451    Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4452    if (U_FAILURE(err)) {
4453        errln("Failed to instantiate transliterator with short ID");
4454        delete t1;
4455        Transliterator::unregister(shortID);
4456        return;
4457    }
4458
4459    if (t1->getID() != longID)
4460        errln("Transliterator instantiated with long ID doesn't have long ID");
4461    if (t2->getID() != reallyShortID)
4462        errln("Transliterator instantiated with short ID doesn't have short ID");
4463
4464    UnicodeString rules1;
4465    UnicodeString rules2;
4466
4467    t1->toRules(rules1, TRUE);
4468    t2->toRules(rules2, TRUE);
4469    if (rules1 != rules2)
4470        errln("Alias transliterators aren't the same");
4471
4472    delete t1;
4473    delete t2;
4474    Transliterator::unregister(shortID);
4475
4476    t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4477    if (U_SUCCESS(err)) {
4478        errln("Instantiation with short ID succeeded after short ID was unregistered");
4479        delete t1;
4480    }
4481
4482    // try the same thing again, but this time with something other than
4483    // an instance of CompoundTransliterator
4484    UnicodeString realID("Latin-Greek");
4485    UnicodeString fakeID("Latin-dlgkjdflkjdl");
4486    Transliterator::registerAlias(fakeID, realID);
4487
4488    err = U_ZERO_ERROR;
4489    t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4490    if (U_FAILURE(err)) {
4491        dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4492        Transliterator::unregister(realID);
4493        return;
4494    }
4495    t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4496    if (U_FAILURE(err)) {
4497        errln("Failed to instantiate transliterator with fake ID");
4498        delete t1;
4499        Transliterator::unregister(realID);
4500        return;
4501    }
4502
4503    t1->toRules(rules1, TRUE);
4504    t2->toRules(rules2, TRUE);
4505    if (rules1 != rules2)
4506        errln("Alias transliterators aren't the same");
4507
4508    delete t1;
4509    delete t2;
4510    Transliterator::unregister(fakeID);
4511}
4512
4513void TransliteratorTest::TestRuleStripping() {
4514    /*
4515#
4516\uE001>\u0C01; # SIGN
4517    */
4518    static const UChar rule[] = {
4519        0x0023,0x0020,0x000D,0x000A,
4520        0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4521    };
4522    static const UChar expectedRule[] = {
4523        0xE001,0x003E,0x0C01,0x003B,0
4524    };
4525    UChar result[sizeof(rule)/sizeof(rule[0])];
4526    UErrorCode status = U_ZERO_ERROR;
4527    int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4528    if (len != u_strlen(expectedRule)) {
4529        errln("utrans_stripRules return len = %d", len);
4530    }
4531    if (u_strncmp(expectedRule, result, len) != 0) {
4532        errln("utrans_stripRules did not return expected string");
4533    }
4534}
4535
4536/**
4537 * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4538 */
4539void TransliteratorTest::TestHalfwidthFullwidth(void) {
4540    UParseError parseError;
4541    UErrorCode status = U_ZERO_ERROR;
4542    Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4543    Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4544    if (hf == 0 || fh == 0) {
4545        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4546        delete hf;
4547        delete fh;
4548        return;
4549    }
4550
4551    // Array of 2n items
4552    // Each item is
4553    //   "hf"|"fh"|"both",
4554    //   <Halfwidth>,
4555    //   <Fullwidth>
4556    const char* DATA[] = {
4557        "both",
4558        "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4559        "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4560    };
4561    int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4562
4563    for (int32_t i=0; i<DATA_length; i+=3) {
4564        UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4565        UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4566        switch (*DATA[i]) {
4567        case 0x68: //'h': // Halfwidth-Fullwidth only
4568            expect(*hf, h, f);
4569            break;
4570        case 0x66: //'f': // Fullwidth-Halfwidth only
4571            expect(*fh, f, h);
4572            break;
4573        case 0x62: //'b': // both directions
4574            expect(*hf, h, f);
4575            expect(*fh, f, h);
4576            break;
4577        }
4578    }
4579    delete hf;
4580    delete fh;
4581}
4582
4583
4584    /**
4585     *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4586     *              TODO: confirm that the expected results are correct.
4587     *              For now, test just confirms that C++ and Java give identical results.
4588     */
4589void TransliteratorTest::TestThai(void) {
4590#if !UCONFIG_NO_BREAK_ITERATION
4591    UParseError parseError;
4592    UErrorCode status = U_ZERO_ERROR;
4593    Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4594    if (tr == 0) {
4595        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4596        return;
4597    }
4598    if (U_FAILURE(status)) {
4599        errln("FAIL: createInstance failed with %s", u_errorName(status));
4600        return;
4601    }
4602    const char *thaiText =
4603        "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4604        "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4605        "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4606        "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4607        "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4608        "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4609        "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4610        "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4611        "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4612        "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4613        "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4614        "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4615        "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4616        "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4617        "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4618        "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4619        "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4620        "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4621        "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4622        "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4623        "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4624        "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4625        "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4626        "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4627        " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4628        "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4629        "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4630        " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4631        "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4632        "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4633
4634    const char *latinText =
4635        "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4636        "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4637        "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4638        "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4639        "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4640        " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4641        "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4642        "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4643        "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4644        "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4645        "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4646        "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4647        " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4648        "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4649        " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4650        "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4651        "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4652        "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4653
4654
4655    UnicodeString  xlitText(thaiText);
4656    xlitText = xlitText.unescape();
4657    tr->transliterate(xlitText);
4658
4659    UnicodeString expectedText(latinText);
4660    expectedText = expectedText.unescape();
4661    expect(*tr, xlitText, expectedText);
4662
4663    delete tr;
4664#endif
4665}
4666
4667
4668//======================================================================
4669// Support methods
4670//======================================================================
4671void TransliteratorTest::expectT(const UnicodeString& id,
4672                                 const UnicodeString& source,
4673                                 const UnicodeString& expectedResult) {
4674    UErrorCode ec = U_ZERO_ERROR;
4675    UParseError pe;
4676    Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4677    if (U_FAILURE(ec)) {
4678        errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4679        delete t;
4680        return;
4681    }
4682    expect(*t, source, expectedResult);
4683    delete t;
4684}
4685
4686void TransliteratorTest::reportParseError(const UnicodeString& message,
4687                                          const UParseError& parseError,
4688                                          const UErrorCode& status) {
4689    dataerrln(message +
4690          /*", parse error " + parseError.code +*/
4691          ", line " + parseError.line +
4692          ", offset " + parseError.offset +
4693          ", pre-context " + prettify(parseError.preContext, TRUE) +
4694          ", post-context " + prettify(parseError.postContext,TRUE) +
4695          ", Error: " + u_errorName(status));
4696}
4697
4698void TransliteratorTest::expect(const UnicodeString& rules,
4699                                const UnicodeString& source,
4700                                const UnicodeString& expectedResult,
4701                                UTransPosition *pos) {
4702    expect("<ID>", rules, source, expectedResult, pos);
4703}
4704
4705void TransliteratorTest::expect(const UnicodeString& id,
4706                                const UnicodeString& rules,
4707                                const UnicodeString& source,
4708                                const UnicodeString& expectedResult,
4709                                UTransPosition *pos) {
4710    UErrorCode status = U_ZERO_ERROR;
4711    UParseError parseError;
4712    Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4713    if (U_FAILURE(status)) {
4714        reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4715    } else {
4716        expect(*t, source, expectedResult, pos);
4717    }
4718    delete t;
4719}
4720
4721void TransliteratorTest::expect(const Transliterator& t,
4722                                const UnicodeString& source,
4723                                const UnicodeString& expectedResult,
4724                                const Transliterator& reverseTransliterator) {
4725    expect(t, source, expectedResult);
4726    expect(reverseTransliterator, expectedResult, source);
4727}
4728
4729void TransliteratorTest::expect(const Transliterator& t,
4730                                const UnicodeString& source,
4731                                const UnicodeString& expectedResult,
4732                                UTransPosition *pos) {
4733    if (pos == 0) {
4734        UnicodeString result(source);
4735        t.transliterate(result);
4736        expectAux(t.getID() + ":String", source, result, expectedResult);
4737    }
4738    UTransPosition index={0, 0, 0, 0};
4739    if (pos != 0) {
4740        index = *pos;
4741    }
4742
4743    UnicodeString rsource(source);
4744    if (pos == 0) {
4745        t.transliterate(rsource);
4746    } else {
4747        // Do it all at once -- below we do it incrementally
4748        t.finishTransliteration(rsource, *pos);
4749    }
4750    expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4751
4752    // Test keyboard (incremental) transliteration -- this result
4753    // must be the same after we finalize (see below).
4754    UnicodeString log;
4755    rsource.remove();
4756    if (pos != 0) {
4757        rsource = source;
4758        formatInput(log, rsource, index);
4759        log.append(" -> ");
4760        UErrorCode status = U_ZERO_ERROR;
4761        t.transliterate(rsource, index, status);
4762        formatInput(log, rsource, index);
4763    } else {
4764        for (int32_t i=0; i<source.length(); ++i) {
4765            if (i != 0) {
4766                log.append(" + ");
4767            }
4768            log.append(source.charAt(i)).append(" -> ");
4769            UErrorCode status = U_ZERO_ERROR;
4770            t.transliterate(rsource, index, source.charAt(i), status);
4771            formatInput(log, rsource, index);
4772        }
4773    }
4774
4775    // As a final step in keyboard transliteration, we must call
4776    // transliterate to finish off any pending partial matches that
4777    // were waiting for more input.
4778    t.finishTransliteration(rsource, index);
4779    log.append(" => ").append(rsource);
4780
4781    expectAux(t.getID() + ":Keyboard", log,
4782              rsource == expectedResult,
4783              expectedResult);
4784}
4785
4786
4787/**
4788 * @param appendTo result is appended to this param.
4789 * @param input the string being transliterated
4790 * @param pos the index struct
4791 */
4792UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4793                                               const UnicodeString& input,
4794                                               const UTransPosition& pos) {
4795    // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4796    // the {} indicate the context start and limit, and the ||
4797    // indicate the start and limit.
4798    if (0 <= pos.contextStart &&
4799        pos.contextStart <= pos.start &&
4800        pos.start <= pos.limit &&
4801        pos.limit <= pos.contextLimit &&
4802        pos.contextLimit <= input.length()) {
4803
4804        UnicodeString a, b, c, d, e;
4805        input.extractBetween(0, pos.contextStart, a);
4806        input.extractBetween(pos.contextStart, pos.start, b);
4807        input.extractBetween(pos.start, pos.limit, c);
4808        input.extractBetween(pos.limit, pos.contextLimit, d);
4809        input.extractBetween(pos.contextLimit, input.length(), e);
4810        appendTo.append(a).append((UChar)123/*{*/).append(b).
4811            append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4812            append((UChar)125/*}*/).append(e);
4813    } else {
4814        appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4815                        pos.contextStart + ", s=" + pos.start + ", l=" +
4816                        pos.limit + ", cl=" + pos.contextLimit + "} on " +
4817                        input);
4818    }
4819    return appendTo;
4820}
4821
4822void TransliteratorTest::expectAux(const UnicodeString& tag,
4823                                   const UnicodeString& source,
4824                                   const UnicodeString& result,
4825                                   const UnicodeString& expectedResult) {
4826    expectAux(tag, source + " -> " + result,
4827              result == expectedResult,
4828              expectedResult);
4829}
4830
4831void TransliteratorTest::expectAux(const UnicodeString& tag,
4832                                   const UnicodeString& summary, UBool pass,
4833                                   const UnicodeString& expectedResult) {
4834    if (pass) {
4835        logln(UnicodeString("(")+tag+") " + prettify(summary));
4836    } else {
4837        dataerrln(UnicodeString("FAIL: (")+tag+") "
4838              + prettify(summary)
4839              + ", expected " + prettify(expectedResult));
4840    }
4841}
4842
4843#endif /* #if !UCONFIG_NO_TRANSLITERATION */
4844