1/*
2**********************************************************************
3*   Copyright (C) 2000-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   05/23/00    aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "unicode/translit.h"
16#include "rbt.h"
17#include "unicode/calendar.h"
18#include "unicode/uniset.h"
19#include "unicode/uchar.h"
20#include "unicode/normlzr.h"
21#include "unicode/uchar.h"
22#include "unicode/parseerr.h"
23#include "unicode/usetiter.h"
24#include "unicode/putil.h"
25#include "unicode/uversion.h"
26#include "unicode/locid.h"
27#include "unicode/ulocdata.h"
28#include "unicode/utf8.h"
29#include "unicode/utf16.h"
30#include "putilimp.h"
31#include "cmemory.h"
32#include "transrt.h"
33#include "testutil.h"
34#include <string.h>
35#include <stdio.h>
36
37#define CASE(id,test) case id:                          \
38                          name = #test;                 \
39                          if (exec) {                   \
40                              logln(#test "---");       \
41                              logln((UnicodeString)""); \
42                              UDate t = uprv_getUTCtime(); \
43                              test();                   \
44                              t = uprv_getUTCtime() - t; \
45                              logln((UnicodeString)#test " took " + t/U_MILLIS_PER_DAY + " seconds"); \
46                          }                             \
47                          break
48
49#define EXHAUSTIVE(id,test) case id:                            \
50                              if(quick==FALSE){                 \
51                                  name = #test;                 \
52                                  if (exec){                    \
53                                      logln(#test "---");       \
54                                      logln((UnicodeString)""); \
55                                      test();                   \
56                                  }                             \
57                              }else{                            \
58                                name="";                        \
59                              }                                 \
60                              break
61void
62TransliteratorRoundTripTest::runIndexedTest(int32_t index, UBool exec,
63                                   const char* &name, char* /*par*/) {
64    switch (index) {
65        CASE(0, TestCyrillic);
66        // CASE(0,TestKana);
67        CASE(1,TestHiragana);
68        CASE(2,TestKatakana);
69        CASE(3,TestJamo);
70        CASE(4,TestHangul);
71        CASE(5,TestGreek);
72        CASE(6,TestGreekUNGEGN);
73        CASE(7,Testel);
74        CASE(8,TestDevanagariLatin);
75        CASE(9,TestInterIndic);
76        CASE(10, TestHebrew);
77        CASE(11, TestArabic);
78        CASE(12, TestHan);
79        default: name = ""; break;
80    }
81}
82
83
84//--------------------------------------------------------------------
85// TransliteratorPointer
86//--------------------------------------------------------------------
87
88/**
89 * A transliterator pointer wrapper that deletes the contained
90 * pointer automatically when the wrapper goes out of scope.
91 * Sometimes called a "janitor" or "smart pointer".
92 */
93class TransliteratorPointer {
94    Transliterator* t;
95    // disallowed:
96    TransliteratorPointer(const TransliteratorPointer& rhs);
97    TransliteratorPointer& operator=(const TransliteratorPointer& rhs);
98public:
99    TransliteratorPointer(Transliterator* adopted) {
100        t = adopted;
101    }
102    ~TransliteratorPointer() {
103        delete t;
104    }
105    inline Transliterator* operator->() { return t; }
106    inline operator const Transliterator*() const { return t; }
107    inline operator Transliterator*() { return t; }
108};
109
110//--------------------------------------------------------------------
111// Legal
112//--------------------------------------------------------------------
113
114class Legal {
115public:
116    Legal() {}
117    virtual ~Legal() {}
118    virtual UBool is(const UnicodeString& /*sourceString*/) const {return TRUE;}
119};
120
121class LegalJamo : public Legal {
122    // any initial must be followed by a medial (or initial)
123    // any medial must follow an initial (or medial)
124    // any final must follow a medial (or final)
125public:
126    LegalJamo() {}
127    virtual ~LegalJamo() {}
128    virtual UBool is(const UnicodeString& sourceString) const;
129            int   getType(UChar c) const;
130};
131
132UBool LegalJamo::is(const UnicodeString& sourceString) const {
133    int t;
134    UnicodeString decomp;
135    UErrorCode ec = U_ZERO_ERROR;
136    Normalizer::decompose(sourceString, FALSE, 0, decomp, ec);
137    if (U_FAILURE(ec)) {
138        return FALSE;
139    }
140    for (int i = 0; i < decomp.length(); ++i) { // don't worry about surrogates
141        switch (getType(decomp.charAt(i))) {
142        case 0: t = getType(decomp.charAt(i+1));
143                if (t != 0 && t != 1) { return FALSE; }
144                break;
145        case 1: t = getType(decomp.charAt(i-1));
146                if (t != 0 && t != 1) { return FALSE; }
147                break;
148        case 2: t = getType(decomp.charAt(i-1));
149                if (t != 1 && t != 2) { return FALSE; }
150                break;
151        }
152    }
153    return TRUE;
154}
155
156int LegalJamo::getType(UChar c) const {
157    if (0x1100 <= c && c <= 0x1112)
158        return 0;
159    else if (0x1161 <= c && c  <= 0x1175)
160             return 1;
161         else if (0x11A8 <= c && c  <= 0x11C2)
162                  return 2;
163    return -1; // other
164}
165
166class LegalGreek : public Legal {
167    UBool full;
168public:
169    LegalGreek(UBool _full) { full = _full; }
170    virtual ~LegalGreek() {}
171
172    virtual UBool is(const UnicodeString& sourceString) const;
173
174    static UBool isVowel(UChar c);
175
176    static UBool isRho(UChar c);
177};
178
179UBool LegalGreek::is(const UnicodeString& sourceString) const {
180    UnicodeString decomp;
181    UErrorCode ec = U_ZERO_ERROR;
182    Normalizer::decompose(sourceString, FALSE, 0, decomp, ec);
183
184    // modern is simpler: don't care about anything but a grave
185    if (full == FALSE) {
186        // A special case which is legal but should be
187        // excluded from round trip
188        // if (sourceString == UnicodeString("\\u039C\\u03C0", "")) {
189        //    return FALSE;
190        // }
191        for (int32_t i = 0; i < decomp.length(); ++i) {
192            UChar c = decomp.charAt(i);
193            // exclude all the accents
194            if (c == 0x0313 || c == 0x0314 || c == 0x0300 || c == 0x0302
195                || c == 0x0342 || c == 0x0345
196                ) return FALSE;
197        }
198        return TRUE;
199    }
200
201    // Legal greek has breathing marks IFF there is a vowel or RHO at the start
202    // IF it has them, it has exactly one.
203    // IF it starts with a RHO, then the breathing mark must come before the second letter.
204    // Since there are no surrogates in greek, don't worry about them
205    UBool firstIsVowel = FALSE;
206    UBool firstIsRho = FALSE;
207    UBool noLetterYet = TRUE;
208    int32_t breathingCount = 0;
209    int32_t letterCount = 0;
210    for (int32_t i = 0; i < decomp.length(); ++i) {
211        UChar c = decomp.charAt(i);
212        if (u_isalpha(c)) {
213            ++letterCount;
214            if (noLetterYet) {
215                noLetterYet =  FALSE;
216                firstIsVowel = isVowel(c);
217                firstIsRho = isRho(c);
218            }
219            if (firstIsRho && letterCount == 2 && breathingCount == 0) {
220                return FALSE;
221            }
222        }
223        if (c == 0x0313 || c == 0x0314) {
224            ++breathingCount;
225        }
226    }
227
228    if (firstIsVowel || firstIsRho) return breathingCount == 1;
229    return breathingCount == 0;
230}
231
232UBool LegalGreek::isVowel(UChar c) {
233    switch (c) {
234    case 0x03B1:
235    case 0x03B5:
236    case 0x03B7:
237    case 0x03B9:
238    case 0x03BF:
239    case 0x03C5:
240    case 0x03C9:
241    case 0x0391:
242    case 0x0395:
243    case 0x0397:
244    case 0x0399:
245    case 0x039F:
246    case 0x03A5:
247    case 0x03A9:
248        return TRUE;
249    }
250    return FALSE;
251}
252
253UBool LegalGreek::isRho(UChar c) {
254    switch (c) {
255    case 0x03C1:
256    case 0x03A1:
257        return TRUE;
258    }
259    return FALSE;
260}
261
262// AbbreviatedUnicodeSetIterator Interface ---------------------------------------------
263//
264//      Iterate over a UnicodeSet, only returning a sampling of the contained code points.
265//        density is the approximate total number of code points to returned for the entire set.
266//
267
268class AbbreviatedUnicodeSetIterator : public UnicodeSetIterator {
269public :
270
271    AbbreviatedUnicodeSetIterator();
272    virtual ~AbbreviatedUnicodeSetIterator();
273    void reset(UnicodeSet& set, UBool abb = FALSE, int32_t density = 100);
274
275    /**
276     * ICU "poor man's RTTI", returns a UClassID for this class.
277     */
278    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
279
280    /**
281     * ICU "poor man's RTTI", returns a UClassID for the actual class.
282     */
283    virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }
284
285private :
286    UBool abbreviated;
287    int32_t perRange;           // The maximum number of code points to be returned from each range
288    virtual void loadRange(int32_t range);
289
290    /**
291     * The address of this static class variable serves as this class's ID
292     * for ICU "poor man's RTTI".
293     */
294    static const char fgClassID;
295};
296
297// AbbreviatedUnicodeSetIterator Implementation ---------------------------------------
298
299const char AbbreviatedUnicodeSetIterator::fgClassID=0;
300
301AbbreviatedUnicodeSetIterator::AbbreviatedUnicodeSetIterator() :
302    UnicodeSetIterator(), abbreviated(FALSE) {
303}
304
305AbbreviatedUnicodeSetIterator::~AbbreviatedUnicodeSetIterator() {
306}
307
308void AbbreviatedUnicodeSetIterator::reset(UnicodeSet& newSet, UBool abb, int32_t density) {
309    UnicodeSetIterator::reset(newSet);
310    abbreviated = abb;
311    perRange = newSet.getRangeCount();
312    if (perRange != 0) {
313        perRange = density / perRange;
314    }
315}
316
317void AbbreviatedUnicodeSetIterator::loadRange(int32_t myRange) {
318    UnicodeSetIterator::loadRange(myRange);
319    if (abbreviated && (endElement > nextElement + perRange)) {
320        endElement = nextElement + perRange;
321    }
322}
323
324//--------------------------------------------------------------------
325// RTTest Interface
326//--------------------------------------------------------------------
327
328class RTTest : public IntlTest {
329
330    // PrintWriter out;
331
332    UnicodeString transliteratorID;
333    int32_t errorLimit;
334    int32_t errorCount;
335    int32_t pairLimit;
336    UnicodeSet sourceRange;
337    UnicodeSet targetRange;
338    UnicodeSet toSource;
339    UnicodeSet toTarget;
340    UnicodeSet roundtripExclusionsSet;
341    IntlTest* parent;
342    Legal* legalSource; // NOT owned
343    UnicodeSet badCharacters;
344
345public:
346
347    /*
348     * create a test for the given script transliterator.
349     */
350    RTTest(const UnicodeString& transliteratorIDStr);
351
352    virtual ~RTTest();
353
354    void setErrorLimit(int32_t limit);
355
356    void setPairLimit(int32_t limit);
357
358    void test(const UnicodeString& sourceRange,
359              const UnicodeString& targetRange,
360              const char* roundtripExclusions,
361              IntlTest* parent,
362              UBool     quick,
363              Legal* adoptedLegal,
364              int32_t density = 100);
365
366private:
367
368    // Added to do better equality check.
369
370    static UBool isSame(const UnicodeString& a, const UnicodeString& b);
371
372    static UBool isCamel(const UnicodeString& a);
373
374    UBool checkIrrelevants(Transliterator *t, const UnicodeString& irrelevants);
375
376    void test2(UBool quick, int32_t density);
377
378    void logWrongScript(const UnicodeString& label,
379                        const UnicodeString& from,
380                        const UnicodeString& to);
381
382    void logNotCanonical(const UnicodeString& label,
383                         const UnicodeString& from,
384                         const UnicodeString& to,
385                         const UnicodeString& fromCan,
386                         const UnicodeString& toCan);
387
388    void logFails(const UnicodeString& label);
389
390    void logToRulesFails(const UnicodeString& label,
391                         const UnicodeString& from,
392                         const UnicodeString& to,
393                         const UnicodeString& toCan);
394
395    void logRoundTripFailure(const UnicodeString& from,
396                             const UnicodeString& toID,
397                             const UnicodeString& to,
398                             const UnicodeString& backID,
399                             const UnicodeString& back);
400};
401
402//--------------------------------------------------------------------
403// RTTest Implementation
404//--------------------------------------------------------------------
405
406/*
407 * create a test for the given script transliterator.
408 */
409RTTest::RTTest(const UnicodeString& transliteratorIDStr) {
410    transliteratorID = transliteratorIDStr;
411    errorLimit = 500;
412    errorCount = 0;
413    pairLimit  = 0x10000;
414}
415
416RTTest::~RTTest() {
417}
418
419void RTTest::setErrorLimit(int32_t limit) {
420    errorLimit = limit;
421}
422
423void RTTest::setPairLimit(int32_t limit) {
424    pairLimit = limit;
425}
426
427UBool RTTest::isSame(const UnicodeString& a, const UnicodeString& b) {
428    if (a == b) return TRUE;
429    if (a.caseCompare(b, U_FOLD_CASE_DEFAULT)==0 && isCamel(a)) return TRUE;
430    UnicodeString aa, bb;
431    UErrorCode ec = U_ZERO_ERROR;
432    Normalizer::decompose(a, FALSE, 0, aa, ec);
433    Normalizer::decompose(b, FALSE, 0, bb, ec);
434    if (aa == bb) return TRUE;
435    if (aa.caseCompare(bb, U_FOLD_CASE_DEFAULT)==0 && isCamel(aa)) return TRUE;
436    return FALSE;
437}
438
439UBool RTTest::isCamel(const UnicodeString& a) {
440    // see if string is of the form aB; e.g. lower, then upper or title
441    UChar32 cp;
442    UBool haveLower = FALSE;
443    for (int32_t i = 0; i < a.length(); i += U16_LENGTH(cp)) {
444        cp = a.char32At(i);
445        int8_t t = u_charType(cp);
446        switch (t) {
447        case U_UPPERCASE_LETTER:
448            if (haveLower) return TRUE;
449            break;
450        case U_TITLECASE_LETTER:
451            if (haveLower) return TRUE;
452            // drop through, since second letter is lower.
453        case U_LOWERCASE_LETTER:
454            haveLower = TRUE;
455            break;
456        }
457    }
458    return FALSE;
459}
460
461void RTTest::test(const UnicodeString& sourceRangeVal,
462                  const UnicodeString& targetRangeVal,
463                  const char* roundtripExclusions,
464                  IntlTest* logVal, UBool quickRt,
465                  Legal* adoptedLegal,
466                  int32_t density)
467{
468
469    UErrorCode status = U_ZERO_ERROR;
470
471    this->parent = logVal;
472    this->legalSource = adoptedLegal;
473
474    UnicodeSet neverOk("[:Other:]", status);
475    UnicodeSet okAnyway("[^[:Letter:]]", status);
476
477    if (U_FAILURE(status)) {
478        parent->dataerrln("FAIL: Initializing UnicodeSet with [:Other:] or [^[:Letter:]] - Error: %s", u_errorName(status));
479        return;
480    }
481
482    this->sourceRange.clear();
483    this->sourceRange.applyPattern(sourceRangeVal, status);
484    if (U_FAILURE(status)) {
485        parent->errln("FAIL: UnicodeSet::applyPattern(" +
486                   sourceRangeVal + ")");
487        return;
488    }
489    this->sourceRange.removeAll(neverOk);
490
491    this->targetRange.clear();
492    this->targetRange.applyPattern(targetRangeVal, status);
493    if (U_FAILURE(status)) {
494        parent->errln("FAIL: UnicodeSet::applyPattern(" +
495                   targetRangeVal + ")");
496        return;
497    }
498    this->targetRange.removeAll(neverOk);
499
500    this->toSource.clear();
501    this->toSource.applyPattern(sourceRangeVal, status);
502    if (U_FAILURE(status)) {
503        parent->errln("FAIL: UnicodeSet::applyPattern(" +
504                   sourceRangeVal + ")");
505        return;
506    }
507    this->toSource.addAll(okAnyway);
508
509    this->toTarget.clear();
510    this->toTarget.applyPattern(targetRangeVal, status);
511    if (U_FAILURE(status)) {
512        parent->errln("FAIL: UnicodeSet::applyPattern(" +
513                   targetRangeVal + ")");
514        return;
515    }
516    this->toTarget.addAll(okAnyway);
517
518    this->roundtripExclusionsSet.clear();
519    if (roundtripExclusions != NULL && strlen(roundtripExclusions) > 0) {
520        this->roundtripExclusionsSet.applyPattern(UnicodeString(roundtripExclusions, -1, US_INV), status);
521        if (U_FAILURE(status)) {
522            parent->errln("FAIL: UnicodeSet::applyPattern(%s)", roundtripExclusions);
523            return;
524        }
525    }
526
527    badCharacters.clear();
528    badCharacters.applyPattern("[:Other:]", status);
529    if (U_FAILURE(status)) {
530        parent->errln("FAIL: UnicodeSet::applyPattern([:Other:])");
531        return;
532    }
533
534    test2(quickRt, density);
535
536    if (errorCount > 0) {
537        char str[100];
538        int32_t length = transliteratorID.extract(str, 100, NULL, status);
539        str[length] = 0;
540        parent->errln("FAIL: %s errors: %d %s", str, errorCount, (errorCount > errorLimit ? " (at least!)" : " ")); // + ", see " + logFileName);
541    } else {
542        char str[100];
543        int32_t length = transliteratorID.extract(str, 100, NULL, status);
544        str[length] = 0;
545        parent->logln("%s ok", str);
546    }
547}
548
549UBool RTTest::checkIrrelevants(Transliterator *t,
550                               const UnicodeString& irrelevants) {
551    for (int i = 0; i < irrelevants.length(); ++i) {
552        UChar c = irrelevants.charAt(i);
553        UnicodeString srcStr(c);
554        UnicodeString targ = srcStr;
555        t->transliterate(targ);
556        if (srcStr == targ) return TRUE;
557    }
558    return FALSE;
559}
560
561void RTTest::test2(UBool quickRt, int32_t density) {
562
563    UnicodeString srcStr, targ, reverse;
564    UErrorCode status = U_ZERO_ERROR;
565    UParseError parseError ;
566    TransliteratorPointer sourceToTarget(
567        Transliterator::createInstance(transliteratorID, UTRANS_FORWARD, parseError,
568                                       status));
569    if ((Transliterator *)sourceToTarget == NULL) {
570        parent->dataerrln("FAIL: createInstance(" + transliteratorID +
571                   ") returned NULL. Error: " + u_errorName(status)
572                   + "\n\tpreContext : " + prettify(parseError.preContext)
573                   + "\n\tpostContext : " + prettify(parseError.postContext));
574
575                return;
576    }
577    TransliteratorPointer targetToSource(sourceToTarget->createInverse(status));
578    if ((Transliterator *)targetToSource == NULL) {
579        parent->errln("FAIL: " + transliteratorID +
580                   ".createInverse() returned NULL. Error:" + u_errorName(status)
581                   + "\n\tpreContext : " + prettify(parseError.preContext)
582                   + "\n\tpostContext : " + prettify(parseError.postContext));
583        return;
584    }
585
586    AbbreviatedUnicodeSetIterator usi;
587    AbbreviatedUnicodeSetIterator usi2;
588
589    parent->logln("Checking that at least one irrelevant character is not NFC'ed");
590    // string is from NFC_NO in the UCD
591    UnicodeString irrelevants = CharsToUnicodeString("\\u2000\\u2001\\u2126\\u212A\\u212B\\u2329");
592
593    if (checkIrrelevants(sourceToTarget, irrelevants) == FALSE) {
594        logFails("Source-Target, irrelevants");
595    }
596    if (checkIrrelevants(targetToSource, irrelevants) == FALSE) {
597        logFails("Target-Source, irrelevants");
598    }
599
600    if (!quickRt){
601      parent->logln("Checking that toRules works");
602      UnicodeString rules = "";
603
604      UParseError parseError;
605      rules = sourceToTarget->toRules(rules, TRUE);
606      // parent->logln((UnicodeString)"toRules => " + rules);
607      TransliteratorPointer sourceToTarget2(Transliterator::createFromRules(
608                                                       "s2t2", rules,
609                                                       UTRANS_FORWARD,
610                                                       parseError, status));
611      if (U_FAILURE(status)) {
612          parent->errln("FAIL: createFromRules %s\n", u_errorName(status));
613          return;
614      }
615
616      rules = targetToSource->toRules(rules, FALSE);
617      TransliteratorPointer targetToSource2(Transliterator::createFromRules(
618                                                       "t2s2", rules,
619                                                       UTRANS_FORWARD,
620                                                       parseError, status));
621      if (U_FAILURE(status)) {
622          parent->errln("FAIL: createFromRules %s\n", u_errorName(status));
623          return;
624      }
625
626      usi.reset(sourceRange);
627      for (;;) {
628          if (!usi.next() || usi.isString()) break;
629          UChar32 c = usi.getCodepoint();
630
631          UnicodeString srcStr((UChar32)c);
632          UnicodeString targ = srcStr;
633          sourceToTarget->transliterate(targ);
634          UnicodeString targ2 = srcStr;
635          sourceToTarget2->transliterate(targ2);
636          if (targ != targ2) {
637              logToRulesFails("Source-Target, toRules", srcStr, targ, targ2);
638          }
639      }
640
641      usi.reset(targetRange);
642      for (;;) {
643          if (!usi.next() || usi.isString()) break;
644          UChar32 c = usi.getCodepoint();
645
646          UnicodeString srcStr((UChar32)c);
647          UnicodeString targ = srcStr;
648          targetToSource->transliterate(targ);
649          UnicodeString targ2 = srcStr;
650          targetToSource2->transliterate(targ2);
651          if (targ != targ2) {
652              logToRulesFails("Target-Source, toRules", srcStr, targ, targ2);
653          }
654      }
655    }
656
657    parent->logln("Checking that all source characters convert to target - Singles");
658
659    UnicodeSet failSourceTarg;
660    usi.reset(sourceRange);
661    for (;;) {
662        if (!usi.next() || usi.isString()) break;
663        UChar32 c = usi.getCodepoint();
664
665        UnicodeString srcStr((UChar32)c);
666        UnicodeString targ = srcStr;
667        sourceToTarget->transliterate(targ);
668        if (toTarget.containsAll(targ) == FALSE
669            || badCharacters.containsSome(targ) == TRUE) {
670            UnicodeString targD;
671            Normalizer::decompose(targ, FALSE, 0, targD, status);
672            if (U_FAILURE(status)) {
673                parent->errln("FAIL: Internal error during decomposition %s\n", u_errorName(status));
674                return;
675            }
676            if (toTarget.containsAll(targD) == FALSE ||
677                badCharacters.containsSome(targD) == TRUE) {
678                logWrongScript("Source-Target", srcStr, targ);
679                failSourceTarg.add(c);
680                continue;
681            }
682        }
683
684        UnicodeString cs2;
685        Normalizer::decompose(srcStr, FALSE, 0, cs2, status);
686        if (U_FAILURE(status)) {
687            parent->errln("FAIL: Internal error during decomposition %s\n", u_errorName(status));
688            return;
689        }
690        UnicodeString targ2 = cs2;
691        sourceToTarget->transliterate(targ2);
692        if (targ != targ2) {
693            logNotCanonical("Source-Target", srcStr, targ,cs2, targ2);
694        }
695    }
696
697    parent->logln("Checking that all source characters convert to target - Doubles");
698
699    UnicodeSet sourceRangeMinusFailures(sourceRange);
700    sourceRangeMinusFailures.removeAll(failSourceTarg);
701
702    usi.reset(sourceRangeMinusFailures, quickRt, density);
703    for (;;) {
704        if (!usi.next() || usi.isString()) break;
705        UChar32 c = usi.getCodepoint();
706
707        usi2.reset(sourceRangeMinusFailures, quickRt, density);
708        for (;;) {
709            if (!usi2.next() || usi2.isString()) break;
710            UChar32 d = usi2.getCodepoint();
711
712            UnicodeString srcStr;
713            srcStr += (UChar32)c;
714            srcStr += (UChar32)d;
715            UnicodeString targ = srcStr;
716            sourceToTarget->transliterate(targ);
717            if (toTarget.containsAll(targ) == FALSE ||
718                badCharacters.containsSome(targ) == TRUE)
719            {
720                UnicodeString targD;
721                Normalizer::decompose(targ, FALSE, 0, targD, status);
722                if (U_FAILURE(status)) {
723                    parent->errln("FAIL: Internal error during decomposition %s\n", u_errorName(status));
724                    return;
725                }
726                if (toTarget.containsAll(targD) == FALSE ||
727                    badCharacters.containsSome(targD) == TRUE) {
728                    logWrongScript("Source-Target", srcStr, targ);
729                    continue;
730                }
731            }
732            UnicodeString cs2;
733            Normalizer::decompose(srcStr, FALSE, 0, cs2, status);
734            if (U_FAILURE(status)) {
735                parent->errln("FAIL: Internal error during decomposition %s\n", u_errorName(status));
736                return;
737            }
738            UnicodeString targ2 = cs2;
739            sourceToTarget->transliterate(targ2);
740            if (targ != targ2) {
741                logNotCanonical("Source-Target", srcStr, targ, cs2,targ2);
742            }
743        }
744    }
745
746    parent->logln("Checking that target characters convert to source and back - Singles");
747
748    UnicodeSet failTargSource;
749    UnicodeSet failRound;
750
751    usi.reset(targetRange);
752    for (;;) {
753        if (!usi.next()) break;
754
755        if(usi.isString()){
756            srcStr = usi.getString();
757        }else{
758            srcStr = (UnicodeString)usi.getCodepoint();
759        }
760
761        UChar32 c = srcStr.char32At(0);
762
763        targ = srcStr;
764        targetToSource->transliterate(targ);
765        reverse = targ;
766        sourceToTarget->transliterate(reverse);
767
768        if (toSource.containsAll(targ) == FALSE ||
769            badCharacters.containsSome(targ) == TRUE) {
770            UnicodeString targD;
771            Normalizer::decompose(targ, FALSE, 0, targD, status);
772            if (U_FAILURE(status)) {
773                parent->errln("FAIL: Internal error during decomposition%s\n", u_errorName(status));
774                return;
775            }
776            if (toSource.containsAll(targD) == FALSE) {
777                logWrongScript("Target-Source", srcStr, targ);
778                failTargSource.add(c);
779                continue;
780            }
781            if (badCharacters.containsSome(targD) == TRUE) {
782                logWrongScript("Target-Source*", srcStr, targ);
783                failTargSource.add(c);
784                continue;
785            }
786        }
787        if (isSame(srcStr, reverse) == FALSE &&
788            roundtripExclusionsSet.contains(c) == FALSE
789            && roundtripExclusionsSet.contains(srcStr)==FALSE) {
790            logRoundTripFailure(srcStr,targetToSource->getID(), targ,sourceToTarget->getID(), reverse);
791            failRound.add(c);
792            continue;
793        }
794
795        UnicodeString targ2;
796        Normalizer::decompose(targ, FALSE, 0, targ2, status);
797        if (U_FAILURE(status)) {
798            parent->errln("FAIL: Internal error during decomposition%s\n", u_errorName(status));
799            return;
800        }
801        UnicodeString reverse2 = targ2;
802        sourceToTarget->transliterate(reverse2);
803        if (reverse != reverse2) {
804            logNotCanonical("Target-Source", targ, reverse, targ2, reverse2);
805        }
806    }
807
808    parent->logln("Checking that target characters convert to source and back - Doubles");
809    int32_t count = 0;
810
811    UnicodeSet targetRangeMinusFailures(targetRange);
812    targetRangeMinusFailures.removeAll(failTargSource);
813    targetRangeMinusFailures.removeAll(failRound);
814
815    usi.reset(targetRangeMinusFailures, quickRt, density);
816    UnicodeString targ2;
817    UnicodeString reverse2;
818    UnicodeString targD;
819    for (;;) {
820        if (!usi.next() || usi.isString()) break;
821        UChar32 c = usi.getCodepoint();
822        if (++count > pairLimit) {
823            //throw new TestTruncated("Test truncated at " + pairLimit + " x 64k pairs");
824            parent->logln("");
825            parent->logln((UnicodeString)"Test truncated at " + pairLimit + " x 64k pairs");
826            return;
827        }
828
829        usi2.reset(targetRangeMinusFailures, quickRt, density);
830        for (;;) {
831            if (!usi2.next() || usi2.isString())
832                break;
833            UChar32 d = usi2.getCodepoint();
834            srcStr.truncate(0);  // empty the variable without construction/destruction
835            srcStr += c;
836            srcStr += d;
837
838            targ = srcStr;
839            targetToSource->transliterate(targ);
840            reverse = targ;
841            sourceToTarget->transliterate(reverse);
842
843            if (toSource.containsAll(targ) == FALSE ||
844                badCharacters.containsSome(targ) == TRUE)
845            {
846                targD.truncate(0);  // empty the variable without construction/destruction
847                Normalizer::decompose(targ, FALSE, 0, targD, status);
848                if (U_FAILURE(status)) {
849                    parent->errln("FAIL: Internal error during decomposition%s\n",
850                               u_errorName(status));
851                    return;
852                }
853                if (toSource.containsAll(targD) == FALSE
854                    || badCharacters.containsSome(targD) == TRUE)
855                {
856                    logWrongScript("Target-Source", srcStr, targ);
857                    continue;
858                }
859            }
860            if (isSame(srcStr, reverse) == FALSE &&
861                roundtripExclusionsSet.contains(c) == FALSE&&
862                roundtripExclusionsSet.contains(d) == FALSE &&
863                roundtripExclusionsSet.contains(srcStr)== FALSE)
864            {
865                logRoundTripFailure(srcStr,targetToSource->getID(), targ, sourceToTarget->getID(),reverse);
866                continue;
867            }
868
869            targ2.truncate(0);  // empty the variable without construction/destruction
870            Normalizer::decompose(targ, FALSE, 0, targ2, status);
871            if (U_FAILURE(status)) {
872                parent->errln("FAIL: Internal error during decomposition%s\n", u_errorName(status));
873                return;
874            }
875            reverse2 = targ2;
876            sourceToTarget->transliterate(reverse2);
877            if (reverse != reverse2) {
878                logNotCanonical("Target-Source", targ,reverse, targ2, reverse2);
879            }
880        }
881    }
882    parent->logln("");
883}
884
885void RTTest::logWrongScript(const UnicodeString& label,
886                            const UnicodeString& from,
887                            const UnicodeString& to) {
888    parent->errln((UnicodeString)"FAIL " +
889               label + ": " +
890               from + "(" + TestUtility::hex(from) + ") => " +
891               to + "(" + TestUtility::hex(to) + ")");
892    ++errorCount;
893}
894
895void RTTest::logNotCanonical(const UnicodeString& label,
896                             const UnicodeString& from,
897                             const UnicodeString& to,
898                             const UnicodeString& fromCan,
899                             const UnicodeString& toCan) {
900    parent->errln((UnicodeString)"FAIL (can.equiv)" +
901               label + ": " +
902               from + "(" + TestUtility::hex(from) + ") => " +
903               to + "(" + TestUtility::hex(to) + ")" +
904               fromCan + "(" + TestUtility::hex(fromCan) + ") => " +
905               toCan + " (" +
906               TestUtility::hex(toCan) + ")"
907               );
908    ++errorCount;
909}
910
911void RTTest::logFails(const UnicodeString& label) {
912    parent->errln((UnicodeString)"<br>FAIL " + label);
913    ++errorCount;
914}
915
916void RTTest::logToRulesFails(const UnicodeString& label,
917                             const UnicodeString& from,
918                             const UnicodeString& to,
919                             const UnicodeString& otherTo)
920{
921    parent->errln((UnicodeString)"FAIL: " +
922               label + ": " +
923               from + "(" + TestUtility::hex(from) + ") => " +
924               to + "(" + TestUtility::hex(to) + ")" +
925               "!=" +
926               otherTo + " (" +
927               TestUtility::hex(otherTo) + ")"
928               );
929    ++errorCount;
930}
931
932
933void RTTest::logRoundTripFailure(const UnicodeString& from,
934                                 const UnicodeString& toID,
935                                 const UnicodeString& to,
936                                 const UnicodeString& backID,
937                                 const UnicodeString& back) {
938    if (legalSource->is(from) == FALSE) return; // skip illegals
939
940    parent->errln((UnicodeString)"FAIL Roundtrip: " +
941               from + "(" + TestUtility::hex(from) + ") => " +
942               to + "(" + TestUtility::hex(to) + ")  "+toID+" => " +
943               back + "(" + TestUtility::hex(back) + ") "+backID+" => ");
944    ++errorCount;
945}
946
947//--------------------------------------------------------------------
948// Specific Tests
949//--------------------------------------------------------------------
950
951    /*
952    Note: Unicode 3.2 added new Hiragana/Katakana characters:
953
9543095..3096    ; 3.2 #   [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE
955309F..30A0    ; 3.2 #   [2] HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOUBLE HYPHEN
95630FF          ; 3.2 #       KATAKANA DIGRAPH KOTO
95731F0..31FF    ; 3.2 #  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
958
959    Unicode 5.2 added another Hiragana character:
9601F200         ; 5.2 #       SQUARE HIRAGANA HOKA
961
962    We will not add them to the rules until they are more supported (e.g. in fonts on Windows)
963    A bug has been filed to remind us to do this: #1979.
964    */
965
966static const char KATAKANA[] = "[[[:katakana:][\\u30A1-\\u30FA\\u30FC]]-[\\u30FF\\u31F0-\\u31FF]-[:^age=5.2:]]";
967static const char HIRAGANA[] = "[[[:hiragana:][\\u3040-\\u3094]]-[\\u3095-\\u3096\\u309F-\\u30A0\\U0001F200-\\U0001F2FF]-[:^age=5.2:]]";
968static const char LENGTH[] = "[\\u30FC]";
969static const char HALFWIDTH_KATAKANA[] = "[\\uFF65-\\uFF9D]";
970static const char KATAKANA_ITERATION[] = "[\\u30FD\\u30FE]";
971static const char HIRAGANA_ITERATION[] = "[\\u309D\\u309E]";
972static const int32_t TEMP_MAX=256;
973
974void TransliteratorRoundTripTest::TestKana() {
975    RTTest test("Katakana-Hiragana");
976    Legal *legal = new Legal();
977    char temp[TEMP_MAX];
978    strcpy(temp, "[");
979    strcat(temp, HALFWIDTH_KATAKANA);
980    strcat(temp, LENGTH);
981    strcat(temp, "]");
982    test.test(KATAKANA, UnicodeString("[") + HIRAGANA + LENGTH + UnicodeString("]"),
983              temp,
984              this, quick, legal);
985    delete legal;
986}
987
988void TransliteratorRoundTripTest::TestHiragana() {
989    RTTest test("Latin-Hiragana");
990    Legal *legal = new Legal();
991    test.test(UnicodeString("[a-zA-Z]", ""),
992              UnicodeString(HIRAGANA, -1, US_INV),
993              HIRAGANA_ITERATION, this, quick, legal);
994    delete legal;
995}
996
997void TransliteratorRoundTripTest::TestKatakana() {
998    RTTest test("Latin-Katakana");
999    Legal *legal = new Legal();
1000    char temp[TEMP_MAX];
1001    strcpy(temp, "[");
1002    strcat(temp, KATAKANA_ITERATION);
1003    strcat(temp, HALFWIDTH_KATAKANA);
1004    strcat(temp, "]");
1005    test.test(UnicodeString("[a-zA-Z]", ""),
1006              UnicodeString(KATAKANA, -1, US_INV),
1007              temp,
1008              this, quick, legal);
1009    delete legal;
1010}
1011
1012void TransliteratorRoundTripTest::TestJamo() {
1013    RTTest t("Latin-Jamo");
1014    Legal *legal = new LegalJamo();
1015    t.test(UnicodeString("[a-zA-Z]", ""),
1016           UnicodeString("[\\u1100-\\u1112 \\u1161-\\u1175 \\u11A8-\\u11C2]",
1017                         ""),
1018           NULL, this, quick, legal);
1019    delete legal;
1020}
1021
1022void TransliteratorRoundTripTest::TestHangul() {
1023    RTTest t("Latin-Hangul");
1024    Legal *legal = new Legal();
1025    if (quick) t.setPairLimit(1000);
1026    t.test(UnicodeString("[a-zA-Z]", ""),
1027           UnicodeString("[\\uAC00-\\uD7A4]", ""),
1028           NULL, this, quick, legal, 1);
1029    delete legal;
1030}
1031
1032
1033#define ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
1034     errcheckln(status, "error at file %s, line %d, status = %s", __FILE__, __LINE__, \
1035         u_errorName(status)); \
1036         return;}}
1037
1038
1039static void writeStringInU8(FILE *out, const UnicodeString &s) {
1040    int i;
1041    for (i=0; i<s.length(); i=s.moveIndex32(i, 1)) {
1042        UChar32  c = s.char32At(i);
1043        uint8_t  bufForOneChar[10];
1044        UBool    isError = FALSE;
1045        int32_t  destIdx = 0;
1046        U8_APPEND(bufForOneChar, destIdx, (int32_t)sizeof(bufForOneChar), c, isError);
1047        fwrite(bufForOneChar, 1, destIdx, out);
1048    }
1049}
1050
1051
1052
1053
1054void TransliteratorRoundTripTest::TestHan() {
1055    UErrorCode  status = U_ZERO_ERROR;
1056    LocalULocaleDataPointer uld(ulocdata_open("zh",&status));
1057    LocalUSetPointer USetExemplars(ulocdata_getExemplarSet(uld.getAlias(), uset_openEmpty(), 0, ULOCDATA_ES_STANDARD, &status));
1058    ASSERT_SUCCESS(status);
1059
1060    UnicodeString source;
1061    UChar32       c;
1062    int           i;
1063    for (i=0; ;i++) {
1064        // Add all of the Chinese exemplar chars to the string "source".
1065        c = uset_charAt(USetExemplars.getAlias(), i);
1066        if (c == (UChar32)-1) {
1067            break;
1068        }
1069        source.append(c);
1070    }
1071
1072    // transform with Han translit
1073    Transliterator *hanTL = Transliterator::createInstance("Han-Latin", UTRANS_FORWARD, status);
1074    ASSERT_SUCCESS(status);
1075    UnicodeString target=source;
1076    hanTL->transliterate(target);
1077    // now verify that there are no Han characters left
1078    UnicodeSet allHan("[:han:]", status);
1079    ASSERT_SUCCESS(status);
1080    if (allHan.containsSome(target)) {
1081        errln("file %s, line %d, No Han must be left after Han-Latin transliteration",
1082            __FILE__, __LINE__);
1083    }
1084
1085    // check the pinyin translit
1086    Transliterator *pn = Transliterator::createInstance("Latin-NumericPinyin", UTRANS_FORWARD, status);
1087    ASSERT_SUCCESS(status);
1088    UnicodeString target2 = target;
1089    pn->transliterate(target2);
1090
1091    // verify that there are no marks
1092    Transliterator *nfd = Transliterator::createInstance("nfd", UTRANS_FORWARD, status);
1093    ASSERT_SUCCESS(status);
1094
1095    UnicodeString nfded = target2;
1096    nfd->transliterate(nfded);
1097    UnicodeSet allMarks(UNICODE_STRING_SIMPLE("[\\u0304\\u0301\\u030C\\u0300\\u0306]"), status); // look only for Pinyin tone marks, not all marks (there are some others in there)
1098    ASSERT_SUCCESS(status);
1099    assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfded));
1100
1101    // verify roundtrip
1102    Transliterator *np = pn->createInverse(status);
1103    ASSERT_SUCCESS(status);
1104    UnicodeString target3 = target2;
1105    np->transliterate(target3);
1106    UBool roundtripOK = (target3.compare(target) == 0);
1107    assertTrue("NumericPinyin must roundtrip", roundtripOK);
1108    if (!roundtripOK) {
1109        const char *filename = "numeric-pinyin.log.txt";
1110        FILE *out = fopen(filename, "w");
1111        errln("Creating log file %s\n", filename);
1112        fprintf(out, "Pinyin:                ");
1113        writeStringInU8(out, target);
1114        fprintf(out, "\nPinyin-Numeric-Pinyin: ");
1115        writeStringInU8(out, target2);
1116        fprintf(out, "\nNumeric-Pinyin-Pinyin: ");
1117        writeStringInU8(out, target3);
1118        fprintf(out, "\n");
1119        fclose(out);
1120    }
1121
1122    delete hanTL;
1123    delete pn;
1124    delete nfd;
1125    delete np;
1126}
1127
1128
1129void TransliteratorRoundTripTest::TestGreek() {
1130    logKnownIssue( "cldrbug:1911");
1131    // It is left in its current state as a regression test.
1132
1133    RTTest test("Latin-Greek");
1134    LegalGreek *legal = new LegalGreek(TRUE);
1135
1136    test.test(UnicodeString("[a-zA-Z]", ""),
1137        UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
1138            "\\u1D26-\\u1D2A" // L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
1139            "\\u1D5D-\\u1D61" // Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
1140            "\\u1D66-\\u1D6A" // L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
1141            "\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
1142            "] & [:Age=4.0:]]",
1143
1144              //UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fcd\\u1fce\\u1fdd\\u1fde\\u1fed-\\u1fef\\u1ffd\\u03D7-\\u03EF]]&[:Age=3.2:]]",
1145                            ""),
1146              "[\\u00B5\\u037A\\u03D0-\\u03F5\\u03f9]", /* exclusions */
1147              this, quick, legal, 50);
1148
1149
1150    delete legal;
1151}
1152
1153
1154void TransliteratorRoundTripTest::TestGreekUNGEGN() {
1155    logKnownIssue( "cldrbug:1911");
1156    // It is left in its current state as a regression test.
1157
1158    RTTest test("Latin-Greek/UNGEGN");
1159    LegalGreek *legal = new LegalGreek(FALSE);
1160
1161    test.test(UnicodeString("[a-zA-Z]", ""),
1162        UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
1163            "\\u1D26-\\u1D2A" // L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
1164            "\\u1D5D-\\u1D61" // Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
1165            "\\u1D66-\\u1D6A" // L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
1166            "\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
1167            "] & [:Age=4.0:]]",
1168              //UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
1169                            ""),
1170              "[\\u0385\\u00B5\\u037A\\u03D0-\\uFFFF {\\u039C\\u03C0}]", /* roundtrip exclusions */
1171              this, quick, legal);
1172
1173    delete legal;
1174}
1175
1176void TransliteratorRoundTripTest::Testel() {
1177    logKnownIssue( "cldrbug:1911");
1178    // It is left in its current state as a regression test.
1179
1180    RTTest test("Latin-el");
1181    LegalGreek *legal = new LegalGreek(FALSE);
1182
1183    test.test(UnicodeString("[a-zA-Z]", ""),
1184        UnicodeString("[\\u003B\\u00B7[[:Greek:]&[:Letter:]]-["
1185            "\\u1D26-\\u1D2A" // L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
1186            "\\u1D5D-\\u1D61" // Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
1187            "\\u1D66-\\u1D6A" // L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
1188            "\\u03D7-\\u03EF" // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
1189            "] & [:Age=4.0:]]",
1190              //UnicodeString("[[\\u003B\\u00B7[:Greek:]-[\\u0374\\u0385\\u1fce\\u1fde\\u03D7-\\u03EF]]&[:Age=3.2:]]",
1191                            ""),
1192              "[\\u00B5\\u037A\\u03D0-\\uFFFF {\\u039C\\u03C0}]", /* exclusions */
1193              this, quick, legal);
1194
1195
1196    delete legal;
1197}
1198
1199
1200void TransliteratorRoundTripTest::TestArabic() {
1201    UnicodeString ARABIC("[\\u060C\\u061B\\u061F\\u0621\\u0627-\\u063A\\u0641-\\u0655\\u0660-\\u066C\\u067E\\u0686\\u0698\\u06A4\\u06AD\\u06AF\\u06CB-\\u06CC\\u06F0-\\u06F9]", -1, US_INV);
1202    Legal *legal = new Legal();
1203    RTTest test("Latin-Arabic");
1204        test.test(UNICODE_STRING_SIMPLE("[a-zA-Z\\u02BE\\u02BF\\u207F]"), ARABIC, "[a-zA-Z\\u02BE\\u02BF\\u207F]",this, quick, legal); //
1205   delete legal;
1206}
1207class LegalHebrew : public Legal {
1208private:
1209    UnicodeSet FINAL;
1210    UnicodeSet NON_FINAL;
1211    UnicodeSet LETTER;
1212public:
1213    LegalHebrew(UErrorCode& error);
1214    virtual ~LegalHebrew() {}
1215    virtual UBool is(const UnicodeString& sourceString) const;
1216};
1217
1218LegalHebrew::LegalHebrew(UErrorCode& error){
1219    FINAL.applyPattern(UNICODE_STRING_SIMPLE("[\\u05DA\\u05DD\\u05DF\\u05E3\\u05E5]"), error);
1220    NON_FINAL.applyPattern(UNICODE_STRING_SIMPLE("[\\u05DB\\u05DE\\u05E0\\u05E4\\u05E6]"), error);
1221    LETTER.applyPattern("[:letter:]", error);
1222}
1223UBool LegalHebrew::is(const UnicodeString& sourceString)const{
1224
1225    if (sourceString.length() == 0) return TRUE;
1226    // don't worry about surrogates.
1227    for (int i = 0; i < sourceString.length(); ++i) {
1228        UChar ch = sourceString.charAt(i);
1229        UChar next = i+1 == sourceString.length() ? 0x0000 : sourceString.charAt(i);
1230        if (FINAL.contains(ch)) {
1231            if (LETTER.contains(next)) return FALSE;
1232        } else if (NON_FINAL.contains(ch)) {
1233            if (!LETTER.contains(next)) return FALSE;
1234        }
1235    }
1236    return TRUE;
1237}
1238void TransliteratorRoundTripTest::TestHebrew() {
1239    logKnownIssue( "cldrbug:1911");
1240    // It is left in its current state as a regression test.
1241
1242    //long start = System.currentTimeMillis();
1243    UErrorCode error = U_ZERO_ERROR;
1244    LegalHebrew* legal = new LegalHebrew(error);
1245    if(U_FAILURE(error)){
1246        dataerrln("Could not construct LegalHebrew object. Error: %s", u_errorName(error));
1247        return;
1248    }
1249    RTTest test("Latin-Hebrew");
1250    test.test(UNICODE_STRING_SIMPLE("[a-zA-Z\\u02BC\\u02BB]"), UNICODE_STRING_SIMPLE("[[[:hebrew:]-[\\u05BD\\uFB00-\\uFBFF]]&[:Age=4.0:]]"), "[\\u05F0\\u05F1\\u05F2]", this, quick, legal);
1251
1252    //showElapsed(start, "TestHebrew");
1253    delete legal;
1254}
1255void TransliteratorRoundTripTest::TestCyrillic() {
1256    RTTest test("Latin-Cyrillic");
1257    Legal *legal = new Legal();
1258
1259    test.test(UnicodeString("[a-zA-Z\\u0110\\u0111\\u02BA\\u02B9]", ""),
1260              UnicodeString("[[\\u0400-\\u045F] & [:Age=3.2:]]", ""), NULL, this, quick,
1261              legal);
1262
1263    delete legal;
1264}
1265
1266
1267// Inter-Indic Tests ----------------------------------
1268class LegalIndic :public Legal{
1269    UnicodeSet vowelSignSet;
1270    UnicodeSet avagraha;
1271    UnicodeSet nukta;
1272    UnicodeSet virama;
1273    UnicodeSet sanskritStressSigns;
1274    UnicodeSet chandrabindu;
1275
1276public:
1277    LegalIndic();
1278    virtual UBool is(const UnicodeString& sourceString) const;
1279    virtual ~LegalIndic() {};
1280};
1281UBool LegalIndic::is(const UnicodeString& sourceString) const{
1282    int cp=sourceString.charAt(0);
1283
1284    // A vowel sign cannot be the first char
1285    if(vowelSignSet.contains(cp)){
1286        return FALSE;
1287    }else if(avagraha.contains(cp)){
1288        return FALSE;
1289    }else if(virama.contains(cp)){
1290        return FALSE;
1291    }else if(nukta.contains(cp)){
1292        return FALSE;
1293    }else if(sanskritStressSigns.contains(cp)){
1294        return FALSE;
1295    }else if(chandrabindu.contains(cp) &&
1296                ((sourceString.length()>1) &&
1297                    vowelSignSet.contains(sourceString.charAt(1)))){
1298        return FALSE;
1299    }
1300    return TRUE;
1301}
1302LegalIndic::LegalIndic(){
1303        UErrorCode status = U_ZERO_ERROR;
1304        vowelSignSet.addAll( UnicodeSet("[\\u0902\\u0903\\u0904\\u093e-\\u094c\\u0962\\u0963]",status));/* Devanagari */
1305        vowelSignSet.addAll( UnicodeSet("[\\u0982\\u0983\\u09be-\\u09cc\\u09e2\\u09e3\\u09D7]",status));/* Bengali */
1306        vowelSignSet.addAll( UnicodeSet("[\\u0a02\\u0a03\\u0a3e-\\u0a4c\\u0a62\\u0a63\\u0a70\\u0a71]",status));/* Gurmukhi */
1307        vowelSignSet.addAll( UnicodeSet("[\\u0a82\\u0a83\\u0abe-\\u0acc\\u0ae2\\u0ae3]",status));/* Gujarati */
1308        vowelSignSet.addAll( UnicodeSet("[\\u0b02\\u0b03\\u0b3e-\\u0b4c\\u0b62\\u0b63\\u0b56\\u0b57]",status));/* Oriya */
1309        vowelSignSet.addAll( UnicodeSet("[\\u0b82\\u0b83\\u0bbe-\\u0bcc\\u0be2\\u0be3\\u0bd7]",status));/* Tamil */
1310        vowelSignSet.addAll( UnicodeSet("[\\u0c02\\u0c03\\u0c3e-\\u0c4c\\u0c62\\u0c63\\u0c55\\u0c56]",status));/* Telugu */
1311        vowelSignSet.addAll( UnicodeSet("[\\u0c82\\u0c83\\u0cbe-\\u0ccc\\u0ce2\\u0ce3\\u0cd5\\u0cd6]",status));/* Kannada */
1312        vowelSignSet.addAll( UnicodeSet("[\\u0d02\\u0d03\\u0d3e-\\u0d4c\\u0d62\\u0d63\\u0d57]",status));/* Malayalam */
1313
1314        avagraha.addAll(UnicodeSet("[\\u093d\\u09bd\\u0abd\\u0b3d\\u0cbd]",status));
1315        nukta.addAll(UnicodeSet("[\\u093c\\u09bc\\u0a3c\\u0abc\\u0b3c\\u0cbc]",status));
1316        virama.addAll(UnicodeSet("[\\u094d\\u09cd\\u0a4d\\u0acd\\u0b4d\\u0bcd\\u0c4d\\u0ccd\\u0d4d]",status));
1317        sanskritStressSigns.addAll(UnicodeSet("[\\u0951\\u0952\\u0953\\u0954\\u097d]",status));
1318        chandrabindu.addAll(UnicodeSet("[\\u0901\\u0981\\u0A81\\u0b01\\u0c01]",status));
1319
1320    }
1321
1322static const char latinForIndic[] = "[['.0-9A-Za-z~\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
1323                                   "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD\\u00FF-\\u010F"
1324                                   "\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-\\u013E\\u0143-\\u0148"
1325                                   "\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017E\\u01A0-\\u01A1\\u01AF-\\u01B0"
1326                                   "\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01ED\\u01F0\\u01F4-\\u01F5\\u01F8-\\u01FB"
1327                                   "\\u0200-\\u021B\\u021E-\\u021F\\u0226-\\u0233\\u0294\\u0303-\\u0304\\u0306\\u0314-\\u0315"
1328                                   "\\u0325\\u040E\\u0419\\u0439\\u045E\\u04C1-\\u04C2\\u04D0-\\u04D1\\u04D6-\\u04D7"
1329                                   "\\u04E2-\\u04E3\\u04EE-\\u04EF\\u1E00-\\u1E99\\u1EA0-\\u1EF9\\u1F01\\u1F03\\u1F05"
1330                                   "\\u1F07\\u1F09\\u1F0B\\u1F0D\\u1F0F\\u1F11\\u1F13\\u1F15\\u1F19\\u1F1B\\u1F1D\\u1F21"
1331                                   "\\u1F23\\u1F25\\u1F27\\u1F29\\u1F2B\\u1F2D\\u1F2F\\u1F31\\u1F33\\u1F35\\u1F37\\u1F39"
1332                                   "\\u1F3B\\u1F3D\\u1F3F\\u1F41\\u1F43\\u1F45\\u1F49\\u1F4B\\u1F4D\\u1F51\\u1F53\\u1F55"
1333                                   "\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F\\u1F61\\u1F63\\u1F65\\u1F67\\u1F69\\u1F6B\\u1F6D"
1334                                   "\\u1F6F\\u1F81\\u1F83\\u1F85\\u1F87\\u1F89\\u1F8B\\u1F8D\\u1F8F\\u1F91\\u1F93\\u1F95"
1335                                   "\\u1F97\\u1F99\\u1F9B\\u1F9D\\u1F9F\\u1FA1\\u1FA3\\u1FA5\\u1FA7\\u1FA9\\u1FAB\\u1FAD"
1336                                   "\\u1FAF-\\u1FB1\\u1FB8-\\u1FB9\\u1FD0-\\u1FD1\\u1FD8-\\u1FD9\\u1FE0-\\u1FE1\\u1FE5"
1337                                   "\\u1FE8-\\u1FE9\\u1FEC\\u212A-\\u212B\\uE04D\\uE064]"
1338                                   "-[\\uE000-\\uE080 \\u01E2\\u01E3]& [[:latin:][:mark:]]]";
1339
1340void TransliteratorRoundTripTest::TestDevanagariLatin() {
1341    {
1342        UErrorCode status = U_ZERO_ERROR;
1343        UParseError parseError;
1344        TransliteratorPointer t1(Transliterator::createInstance("[\\u0964-\\u0965\\u0981-\\u0983\\u0985-\\u098C\\u098F-\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BC\\u09BE-\\u09C4\\u09C7-\\u09C8\\u09CB-\\u09CD\\u09D7\\u09DC-\\u09DD\\u09DF-\\u09E3\\u09E6-\\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC;",UTRANS_FORWARD, parseError, status));
1345        if((Transliterator *)t1 != NULL){
1346            TransliteratorPointer t2(t1->createInverse(status));
1347            if(U_FAILURE(status)){
1348                errln("FAIL: could not create the Inverse:-( \n");
1349            }
1350        }else {
1351            dataerrln("FAIL: could not create the transliterator. Error: %s\n", u_errorName(status));
1352        }
1353
1354    }
1355    RTTest test("Latin-Devanagari");
1356    Legal *legal = new LegalIndic();
1357    logKnownIssue( "cldrbug:1911");
1358    // It is left in its current state as a regression test.
1359
1360    test.test(UnicodeString(latinForIndic, ""),
1361        UnicodeString("[[[:Devanagari:][\\u094d][\\u0964\\u0965]]&[:Age=4.1:]-[\\u0970]]", ""), "[\\u0965\\u0904]", this, quick,
1362            legal, 50);
1363
1364    delete legal;
1365}
1366
1367/* Defined this way for HP/UX11CC :-( */
1368static const int32_t INTER_INDIC_ARRAY_WIDTH = 4;
1369static const char * const interIndicArray[] = {
1370
1371    "BENGALI-DEVANAGARI", "[:BENGALI:]", "[[:Devanagari:]-[\\u0970]]",
1372    "[\\u0904\\u0951-\\u0954\\u0943-\\u0949\\u094a\\u0962\\u0963\\u090D\\u090e\\u0911\\u0912\\u0929\\u0933\\u0934\\u0935\\u093d\\u0950\\u0958\\u0959\\u095a\\u095b\\u095e\\u097d]", /*roundtrip exclusions*/
1373
1374    "DEVANAGARI-BENGALI", "[[:Devanagari:]-[\\u0970]]", "[:BENGALI:]",
1375    "[\\u0951-\\u0954\\u0951-\\u0954\\u09D7\\u090D\\u090e\\u0911\\u0912\\u0929\\u0933\\u0934\\u0935\\u093d\\u0950\\u0958\\u0959\\u095a\\u095b\\u095e\\u09f0\\u09f1\\u09f2-\\u09fa\\u09ce]", /*roundtrip exclusions*/
1376
1377    "GURMUKHI-DEVANAGARI", "[:GURMUKHI:]", "[[:Devanagari:]-[\\u0970]]",
1378    "[\\u0904\\u0901\\u0902\\u0936\\u0933\\u0951-\\u0954\\u0902\\u0903\\u0943-\\u0949\\u094a\\u0962\\u0963\\u090B\\u090C\\u090D\\u090e\\u0911\\u0912\\u0934\\u0937\\u093D\\u0950\\u0960\\u0961\\u097d]", /*roundtrip exclusions*/
1379
1380    "DEVANAGARI-GURMUKHI", "[[:Devanagari:]-[\\u0970]]", "[:GURMUKHI:]",
1381    "[\\u0904\\u0A02\\u0946\\u0A5C\\u0951-\\u0954\\u0A70\\u0A71\\u090B\\u090C\\u090D\\u090e\\u0911\\u0912\\u0934\\u0937\\u093D\\u0950\\u0960\\u0961\\u0a72\\u0a73\\u0a74]", /*roundtrip exclusions*/
1382
1383    "GUJARATI-DEVANAGARI", "[:GUJARATI:]", "[[:Devanagari:]-[\\u0970]]",
1384    "[\\u0946\\u094A\\u0962\\u0963\\u0951-\\u0954\\u0961\\u090c\\u090e\\u0912\\u097d]", /*roundtrip exclusions*/
1385
1386    "DEVANAGARI-GUJARATI", "[[:Devanagari:]-[\\u0970]]", "[:GUJARATI:]",
1387    "[\\u0951-\\u0954\\u0961\\u090c\\u090e\\u0912]", /*roundtrip exclusions*/
1388
1389    "ORIYA-DEVANAGARI", "[:ORIYA:]", "[[:Devanagari:]-[\\u0970]]",
1390    "[\\u0904\\u0943-\\u094a\\u0962\\u0963\\u0951-\\u0954\\u0950\\u090D\\u090e\\u0912\\u0911\\u0931\\u0935\\u097d]", /*roundtrip exclusions*/
1391
1392    "DEVANAGARI-ORIYA", "[[:Devanagari:]-[\\u0970]]", "[:ORIYA:]",
1393    "[\\u0b5f\\u0b56\\u0b57\\u0b70\\u0b71\\u0950\\u090D\\u090e\\u0912\\u0911\\u0931]", /*roundtrip exclusions*/
1394
1395    "Tamil-DEVANAGARI", "[:tamil:]", "[[:Devanagari:]-[\\u0970]]",
1396    "[\\u0901\\u0904\\u093c\\u0943-\\u094a\\u0951-\\u0954\\u0962\\u0963\\u090B\\u090C\\u090D\\u0911\\u0916\\u0917\\u0918\\u091B\\u091D\\u0920\\u0921\\u0922\\u0925\\u0926\\u0927\\u092B\\u092C\\u092D\\u0936\\u093d\\u0950[\\u0958-\\u0961]\\u097d]", /*roundtrip exclusions*/
1397
1398    "DEVANAGARI-Tamil", "[[:Devanagari:]-[\\u0970]]", "[:tamil:]",
1399    "[\\u0bd7\\u0BF0\\u0BF1\\u0BF2]", /*roundtrip exclusions*/
1400
1401    "Telugu-DEVANAGARI", "[:telugu:]", "[[:Devanagari:]-[\\u0970]]",
1402    "[\\u0904\\u093c\\u0950\\u0945\\u0949\\u0951-\\u0954\\u0962\\u0963\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]\\u097d]", /*roundtrip exclusions*/
1403
1404    "DEVANAGARI-TELUGU", "[[:Devanagari:]-[\\u0970]]", "[:TELUGU:]",
1405    "[\\u0c55\\u0c56\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
1406
1407    "KANNADA-DEVANAGARI", "[:KANNADA:]", "[[:Devanagari:]-[\\u0970]]",
1408    "[\\u0901\\u0904\\u0946\\u093c\\u0950\\u0945\\u0949\\u0951-\\u0954\\u0962\\u0963\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]\\u097d]", /*roundtrip exclusions*/
1409
1410    "DEVANAGARI-KANNADA", "[[:Devanagari:]-[\\u0970]]", "[:KANNADA:]",
1411    "[{\\u0cb0\\u0cbc}{\\u0cb3\\u0cbc}\\u0cde\\u0cd5\\u0cd6\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
1412
1413    "MALAYALAM-DEVANAGARI", "[:MALAYALAM:]", "[[:Devanagari:]-[\\u0970]]",
1414    "[\\u0901\\u0904\\u094a\\u094b\\u094c\\u093c\\u0950\\u0944\\u0945\\u0949\\u0951-\\u0954\\u0962\\u0963\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]\\u097d]", /*roundtrip exclusions*/
1415
1416    "DEVANAGARI-MALAYALAM", "[[:Devanagari:]-[\\u0970]]", "[:MALAYALAM:]",
1417    "[\\u0d4c\\u0d57\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
1418
1419    "GURMUKHI-BENGALI", "[:GURMUKHI:]", "[:BENGALI:]",
1420    "[\\u0981\\u0982\\u09b6\\u09e2\\u09e3\\u09c3\\u09c4\\u09d7\\u098B\\u098C\\u09B7\\u09E0\\u09E1\\u09F0\\u09F1\\u09f2-\\u09fa\\u09ce]", /*roundtrip exclusions*/
1421
1422    "BENGALI-GURMUKHI", "[:BENGALI:]", "[:GURMUKHI:]",
1423    "[\\u0A02\\u0a5c\\u0a47\\u0a70\\u0a71\\u0A33\\u0A35\\u0A59\\u0A5A\\u0A5B\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
1424
1425    "GUJARATI-BENGALI", "[:GUJARATI:]", "[:BENGALI:]",
1426    "[\\u09d7\\u09e2\\u09e3\\u098c\\u09e1\\u09f0\\u09f1\\u09f2-\\u09fa\\u09ce]", /*roundtrip exclusions*/
1427
1428    "BENGALI-GUJARATI", "[:BENGALI:]", "[:GUJARATI:]",
1429    "[\\u0A82\\u0a83\\u0Ac9\\u0Ac5\\u0ac7\\u0A8D\\u0A91\\u0AB3\\u0AB5\\u0ABD\\u0AD0]", /*roundtrip exclusions*/
1430
1431    "ORIYA-BENGALI", "[:ORIYA:]", "[:BENGALI:]",
1432    "[\\u09c4\\u09e2\\u09e3\\u09f0\\u09f1\\u09f2-\\u09fa\\u09ce]", /*roundtrip exclusions*/
1433
1434    "BENGALI-ORIYA", "[:BENGALI:]", "[:ORIYA:]",
1435    "[\\u0b35\\u0b71\\u0b5f\\u0b56\\u0b33\\u0b3d]", /*roundtrip exclusions*/
1436
1437    "Tamil-BENGALI", "[:tamil:]", "[:BENGALI:]",
1438    "[\\u0981\\u09bc\\u09c3\\u09c4\\u09e2\\u09e3\\u09f0\\u09f1\\u098B\\u098C\\u0996\\u0997\\u0998\\u099B\\u099D\\u09A0\\u09A1\\u09A2\\u09A5\\u09A6\\u09A7\\u09AB\\u09AC\\u09AD\\u09B6\\u09DC\\u09DD\\u09DF\\u09E0\\u09E1\\u09f2-\\u09fa\\u09ce]", /*roundtrip exclusions*/
1439
1440    "BENGALI-Tamil", "[:BENGALI:]", "[:tamil:]",
1441    "[\\u0bc6\\u0bc7\\u0bca\\u0B8E\\u0B92\\u0BA9\\u0BB1\\u0BB3\\u0BB4\\u0BB5\\u0BF0\\u0BF1\\u0BF2]", /*roundtrip exclusions*/
1442
1443    "Telugu-BENGALI", "[:telugu:]", "[:BENGALI:]",
1444    "[\\u09e2\\u09e3\\u09bc\\u09d7\\u09f0\\u09f1\\u09dc\\u09dd\\u09df\\u09f2-\\u09fa\\u09ce]", /*roundtrip exclusions*/
1445
1446    "BENGALI-TELUGU", "[:BENGALI:]", "[:TELUGU:]",
1447    "[\\u0c55\\u0c56\\u0c47\\u0c46\\u0c4a\\u0C0E\\u0C12\\u0C31\\u0C33\\u0C35]", /*roundtrip exclusions*/
1448
1449    "KANNADA-BENGALI", "[:KANNADA:]", "[:BENGALI:]",
1450    "[\\u0981\\u09e2\\u09e3\\u09bc\\u09d7\\u09dc\\u09dd\\u09df\\u09f0\\u09f1\\u09f2-\\u09fa\\u09ce]", /*roundtrip exclusions*/
1451
1452    "BENGALI-KANNADA", "[:BENGALI:]", "[:KANNADA:]",
1453    "[{\\u0cb0\\u0cbc}{\\u0cb3\\u0cbc}\\u0cc6\\u0cca\\u0cd5\\u0cd6\\u0cc7\\u0C8E\\u0C92\\u0CB1\\u0cb3\\u0cb5\\u0cde]", /*roundtrip exclusions*/
1454
1455    "MALAYALAM-BENGALI", "[:MALAYALAM:]", "[:BENGALI:]",
1456    "[\\u0981\\u09e2\\u09e3\\u09bc\\u09c4\\u09f0\\u09f1\\u09dc\\u09dd\\u09df\\u09dc\\u09dd\\u09df\\u09f2-\\u09fa\\u09ce]", /*roundtrip exclusions*/
1457
1458    "BENGALI-MALAYALAM", "[:BENGALI:]", "[:MALAYALAM:]",
1459    "[\\u0d46\\u0d4a\\u0d47\\u0d31-\\u0d35\\u0d0e\\u0d12]", /*roundtrip exclusions*/
1460
1461    "GUJARATI-GURMUKHI", "[:GUJARATI:]", "[:GURMUKHI:]",
1462    "[\\u0A02\\u0ab3\\u0ab6\\u0A70\\u0a71\\u0a82\\u0a83\\u0ac3\\u0ac4\\u0ac5\\u0ac9\\u0a5c\\u0a72\\u0a73\\u0a74\\u0a8b\\u0a8d\\u0a91\\u0abd]", /*roundtrip exclusions*/
1463
1464    "GURMUKHI-GUJARATI", "[:GURMUKHI:]", "[:GUJARATI:]",
1465    "[\\u0a5c\\u0A70\\u0a71\\u0a72\\u0a73\\u0a74\\u0a82\\u0a83\\u0a8b\\u0a8c\\u0a8d\\u0a91\\u0ab3\\u0ab6\\u0ab7\\u0abd\\u0ac3\\u0ac4\\u0ac5\\u0ac9\\u0ad0\\u0ae0\\u0ae1]", /*roundtrip exclusions*/
1466
1467    "ORIYA-GURMUKHI", "[:ORIYA:]", "[:GURMUKHI:]",
1468    "[\\u0A01\\u0A02\\u0a5c\\u0a21\\u0a47\\u0a71\\u0b02\\u0b03\\u0b33\\u0b36\\u0b43\\u0b56\\u0b57\\u0B0B\\u0B0C\\u0B37\\u0B3D\\u0B5F\\u0B60\\u0B61\\u0a35\\u0a72\\u0a73\\u0a74]", /*roundtrip exclusions*/
1469
1470    "GURMUKHI-ORIYA", "[:GURMUKHI:]", "[:ORIYA:]",
1471    "[\\u0b01\\u0b02\\u0b03\\u0b33\\u0b36\\u0b43\\u0b56\\u0b57\\u0B0B\\u0B0C\\u0B37\\u0B3D\\u0B5F\\u0B60\\u0B61\\u0b70\\u0b71]", /*roundtrip exclusions*/
1472
1473    "TAMIL-GURMUKHI", "[:TAMIL:]", "[:GURMUKHI:]",
1474    "[\\u0A01\\u0A02\\u0a33\\u0a36\\u0a3c\\u0a70\\u0a71\\u0a47\\u0A16\\u0A17\\u0A18\\u0A1B\\u0A1D\\u0A20\\u0A21\\u0A22\\u0A25\\u0A26\\u0A27\\u0A2B\\u0A2C\\u0A2D\\u0A59\\u0A5A\\u0A5B\\u0A5C\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
1475
1476    "GURMUKHI-TAMIL", "[:GURMUKHI:]", "[:TAMIL:]",
1477    "[\\u0b82\\u0bc6\\u0bca\\u0bd7\\u0bb7\\u0bb3\\u0b83\\u0B8E\\u0B92\\u0BA9\\u0BB1\\u0BB4\\u0bb6\\u0BF0\\u0BF1\\u0BF2]", /*roundtrip exclusions*/
1478
1479    "TELUGU-GURMUKHI", "[:TELUGU:]", "[:GURMUKHI:]",
1480    "[\\u0A02\\u0a33\\u0a36\\u0a3c\\u0a70\\u0a71\\u0A59\\u0A5A\\u0A5B\\u0A5C\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
1481
1482    "GURMUKHI-TELUGU", "[:GURMUKHI:]", "[:TELUGU:]",
1483    "[\\u0c01\\u0c02\\u0c03\\u0c33\\u0c36\\u0c44\\u0c43\\u0c46\\u0c4a\\u0c56\\u0c55\\u0C0B\\u0C0C\\u0C0E\\u0C12\\u0C31\\u0C37\\u0C60\\u0C61]", /*roundtrip exclusions*/
1484
1485    "KANNADA-GURMUKHI", "[:KANNADA:]", "[:GURMUKHI:]",
1486    "[\\u0A01\\u0A02\\u0a33\\u0a36\\u0a3c\\u0a70\\u0a71\\u0A59\\u0A5A\\u0A5B\\u0A5C\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
1487
1488    "GURMUKHI-KANNADA", "[:GURMUKHI:]", "[:KANNADA:]",
1489    "[{\\u0cb0\\u0cbc}{\\u0cb3\\u0cbc}\\u0c82\\u0c83\\u0cb3\\u0cb6\\u0cc4\\u0cc3\\u0cc6\\u0cca\\u0cd5\\u0cd6\\u0C8B\\u0C8C\\u0C8E\\u0C92\\u0CB1\\u0CB7\\u0cbd\\u0CE0\\u0CE1\\u0cde]", /*roundtrip exclusions*/
1490
1491    "MALAYALAM-GURMUKHI", "[:MALAYALAM:]", "[:GURMUKHI:]",
1492    "[\\u0A01\\u0A02\\u0a4b\\u0a4c\\u0a33\\u0a36\\u0a3c\\u0a70\\u0a71\\u0A59\\u0A5A\\u0A5B\\u0A5C\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
1493
1494    "GURMUKHI-MALAYALAM", "[:GURMUKHI:]", "[:MALAYALAM:]",
1495    "[\\u0d02\\u0d03\\u0d33\\u0d36\\u0d43\\u0d46\\u0d4a\\u0d4c\\u0d57\\u0D0B\\u0D0C\\u0D0E\\u0D12\\u0D31\\u0D34\\u0D37\\u0D60\\u0D61]", /*roundtrip exclusions*/
1496
1497    "GUJARATI-ORIYA", "[:GUJARATI:]", "[:ORIYA:]",
1498    "[\\u0b56\\u0b57\\u0B0C\\u0B5F\\u0B61\\u0b70\\u0b71]", /*roundtrip exclusions*/
1499
1500    "ORIYA-GUJARATI", "[:ORIYA:]", "[:GUJARATI:]",
1501    "[\\u0Ac4\\u0Ac5\\u0Ac9\\u0Ac7\\u0A8D\\u0A91\\u0AB5\\u0Ad0]", /*roundtrip exclusions*/
1502
1503    "TAMIL-GUJARATI", "[:TAMIL:]", "[:GUJARATI:]",
1504    "[\\u0A81\\u0a8c\\u0abc\\u0ac3\\u0Ac4\\u0Ac5\\u0Ac9\\u0Ac7\\u0A8B\\u0A8D\\u0A91\\u0A96\\u0A97\\u0A98\\u0A9B\\u0A9D\\u0AA0\\u0AA1\\u0AA2\\u0AA5\\u0AA6\\u0AA7\\u0AAB\\u0AAC\\u0AAD\\u0AB6\\u0ABD\\u0AD0\\u0AE0\\u0AE1]", /*roundtrip exclusions*/
1505
1506    "GUJARATI-TAMIL", "[:GUJARATI:]", "[:TAMIL:]",
1507    "[\\u0Bc6\\u0Bca\\u0Bd7\\u0B8E\\u0B92\\u0BA9\\u0BB1\\u0BB4\\u0BF0\\u0BF1\\u0BF2]", /*roundtrip exclusions*/
1508
1509    "TELUGU-GUJARATI", "[:TELUGU:]", "[:GUJARATI:]",
1510    "[\\u0abc\\u0Ac5\\u0Ac9\\u0A8D\\u0A91\\u0ABD\\u0Ad0]", /*roundtrip exclusions*/
1511
1512    "GUJARATI-TELUGU", "[:GUJARATI:]", "[:TELUGU:]",
1513    "[\\u0c46\\u0c4a\\u0c55\\u0c56\\u0C0C\\u0C0E\\u0C12\\u0C31\\u0C61]", /*roundtrip exclusions*/
1514
1515    "KANNADA-GUJARATI", "[:KANNADA:]", "[:GUJARATI:]",
1516    "[\\u0A81\\u0abc\\u0Ac5\\u0Ac9\\u0A8D\\u0A91\\u0ABD\\u0Ad0]", /*roundtrip exclusions*/
1517
1518    "GUJARATI-KANNADA", "[:GUJARATI:]", "[:KANNADA:]",
1519    "[{\\u0cb0\\u0cbc}{\\u0cb3\\u0cbc}\\u0cc6\\u0cca\\u0cd5\\u0cd6\\u0C8C\\u0C8E\\u0C92\\u0CB1\\u0CDE\\u0CE1]", /*roundtrip exclusions*/
1520
1521    "MALAYALAM-GUJARATI", "[:MALAYALAM:]", "[:GUJARATI:]",
1522    "[\\u0A81\\u0ac4\\u0acb\\u0acc\\u0abc\\u0Ac5\\u0Ac9\\u0A8D\\u0A91\\u0ABD\\u0Ad0]", /*roundtrip exclusions*/
1523
1524    "GUJARATI-MALAYALAM", "[:GUJARATI:]", "[:MALAYALAM:]",
1525    "[\\u0d46\\u0d4a\\u0d4c\\u0d55\\u0d57\\u0D0C\\u0D0E\\u0D12\\u0D31\\u0D34\\u0D61]", /*roundtrip exclusions*/
1526
1527    "TAMIL-ORIYA", "[:TAMIL:]", "[:ORIYA:]",
1528    "[\\u0B01\\u0b3c\\u0b43\\u0b56\\u0B0B\\u0B0C\\u0B16\\u0B17\\u0B18\\u0B1B\\u0B1D\\u0B20\\u0B21\\u0B22\\u0B25\\u0B26\\u0B27\\u0B2B\\u0B2C\\u0B2D\\u0B36\\u0B3D\\u0B5C\\u0B5D\\u0B5F\\u0B60\\u0B61\\u0b70\\u0b71]", /*roundtrip exclusions*/
1529
1530    "ORIYA-TAMIL", "[:ORIYA:]", "[:TAMIL:]",
1531    "[\\u0bc6\\u0bca\\u0bc7\\u0B8E\\u0B92\\u0BA9\\u0BB1\\u0BB4\\u0BB5\\u0BF0\\u0BF1\\u0BF2]", /*roundtrip exclusions*/
1532
1533    "TELUGU-ORIYA", "[:TELUGU:]", "[:ORIYA:]",
1534    "[\\u0b3c\\u0b57\\u0b56\\u0B3D\\u0B5C\\u0B5D\\u0B5F\\u0b70\\u0b71]", /*roundtrip exclusions*/
1535
1536    "ORIYA-TELUGU", "[:ORIYA:]", "[:TELUGU:]",
1537    "[\\u0c44\\u0c46\\u0c4a\\u0c55\\u0c47\\u0C0E\\u0C12\\u0C31\\u0C35]", /*roundtrip exclusions*/
1538
1539    "KANNADA-ORIYA", "[:KANNADA:]", "[:ORIYA:]",
1540    "[\\u0B01\\u0b3c\\u0b57\\u0B3D\\u0B5C\\u0B5D\\u0B5F\\u0b70\\u0b71]", /*roundtrip exclusions*/
1541
1542    "ORIYA-KANNADA", "[:ORIYA:]", "[:KANNADA:]",
1543    "[{\\u0cb0\\u0cbc}{\\u0cb3\\u0cbc}\\u0cc4\\u0cc6\\u0cca\\u0cd5\\u0cc7\\u0C8E\\u0C92\\u0CB1\\u0CB5\\u0CDE]", /*roundtrip exclusions*/
1544
1545    "MALAYALAM-ORIYA", "[:MALAYALAM:]", "[:ORIYA:]",
1546    "[\\u0B01\\u0b3c\\u0b56\\u0B3D\\u0B5C\\u0B5D\\u0B5F\\u0b70\\u0b71]", /*roundtrip exclusions*/
1547
1548    "ORIYA-MALAYALAM", "[:ORIYA:]", "[:MALAYALAM:]",
1549    "[\\u0D47\\u0D46\\u0D4a\\u0D0E\\u0D12\\u0D31\\u0D34\\u0D35]", /*roundtrip exclusions*/
1550
1551    "TELUGU-TAMIL", "[:TELUGU:]", "[:TAMIL:]",
1552    "[\\u0bd7\\u0ba9\\u0bb4\\u0BF0\\u0BF1\\u0BF2]", /*roundtrip exclusions*/
1553
1554    "TAMIL-TELUGU", "[:TAMIL:]", "[:TELUGU:]",
1555    "[\\u0C01\\u0c43\\u0c44\\u0c46\\u0c47\\u0c55\\u0c56\\u0c66\\u0C0B\\u0C0C\\u0C16\\u0C17\\u0C18\\u0C1B\\u0C1D\\u0C20\\u0C21\\u0C22\\u0C25\\u0C26\\u0C27\\u0C2B\\u0C2C\\u0C2D\\u0C36\\u0C60\\u0C61]", /*roundtrip exclusions*/
1556
1557    "KANNADA-TAMIL", "[:KANNADA:]", "[:TAMIL:]",
1558    "[\\u0bd7\\u0bc6\\u0ba9\\u0bb4\\u0BF0\\u0BF1\\u0BF2]", /*roundtrip exclusions*/
1559
1560    "TAMIL-KANNADA", "[:TAMIL:]", "[:KANNADA:]",
1561    "[\\u0cc3\\u0cc4\\u0cc6\\u0cc7\\u0cd5\\u0cd6\\u0C8B\\u0C8C\\u0C96\\u0C97\\u0C98\\u0C9B\\u0C9D\\u0CA0\\u0CA1\\u0CA2\\u0CA5\\u0CA6\\u0CA7\\u0CAB\\u0CAC\\u0CAD\\u0CB6\\u0cbc\\u0cbd\\u0CDE\\u0CE0\\u0CE1]", /*roundtrip exclusions*/
1562
1563    "MALAYALAM-TAMIL", "[:MALAYALAM:]", "[:TAMIL:]",
1564    "[\\u0ba9\\u0BF0\\u0BF1\\u0BF2]", /*roundtrip exclusions*/
1565
1566    "TAMIL-MALAYALAM", "[:TAMIL:]", "[:MALAYALAM:]",
1567    "[\\u0d43\\u0d12\\u0D0B\\u0D0C\\u0D16\\u0D17\\u0D18\\u0D1B\\u0D1D\\u0D20\\u0D21\\u0D22\\u0D25\\u0D26\\u0D27\\u0D2B\\u0D2C\\u0D2D\\u0D36\\u0D60\\u0D61]", /*roundtrip exclusions*/
1568
1569    "KANNADA-TELUGU", "[:KANNADA:]", "[:TELUGU:]",
1570    "[\\u0C01\\u0c3f\\u0c46\\u0c48\\u0c4a]", /*roundtrip exclusions*/
1571
1572    "TELUGU-KANNADA", "[:TELUGU:]", "[:KANNADA:]",
1573    "[\\u0cc8\\u0cd5\\u0cd6\\u0cbc\\u0cbd\\u0CDE]", /*roundtrip exclusions*/
1574
1575    "MALAYALAM-TELUGU", "[:MALAYALAM:]", "[:TELUGU:]",
1576    "[\\u0C01\\u0c44\\u0c4a\\u0c4c\\u0c4b\\u0c55\\u0c56]", /*roundtrip exclusions*/
1577
1578    "TELUGU-MALAYALAM", "[:TELUGU:]", "[:MALAYALAM:]",
1579    "[\\u0d4c\\u0d57\\u0D34]", /*roundtrip exclusions*/
1580
1581    "MALAYALAM-KANNADA", "[:MALAYALAM:]", "[:KANNADA:]",
1582    "[\\u0cbc\\u0cbd\\u0cc4\\u0cc6\\u0cca\\u0ccc\\u0ccb\\u0cd5\\u0cd6\\u0cDe]", /*roundtrip exclusions*/
1583
1584    "KANNADA-MALAYALAM", "[:KANNADA:]", "[:MALAYALAM:]",
1585    "[\\u0d4c\\u0d57\\u0d46\\u0D34]", /*roundtrip exclusions*/
1586
1587    "Latin-Bengali",latinForIndic, "[[:Bengali:][\\u0964\\u0965]]",
1588    "[\\u0965\\u09f0-\\u09fa\\u09ce]" /*roundtrip exclusions*/ ,
1589
1590    "Latin-Gurmukhi", latinForIndic, "[[:Gurmukhi:][\\u0964\\u0965]]",
1591    "[\\u0a01\\u0965\\u0a02\\u0a72\\u0a73\\u0a74]" /*roundtrip exclusions*/,
1592
1593    "Latin-Gujarati",latinForIndic, "[[:Gujarati:][\\u0964\\u0965]]",
1594    "[\\u0965]" /*roundtrip exclusions*/,
1595
1596    "Latin-Oriya",latinForIndic, "[[:Oriya:][\\u0964\\u0965]]",
1597    "[\\u0965\\u0b70]" /*roundtrip exclusions*/,
1598
1599    "Latin-Tamil",latinForIndic, "[:Tamil:]",
1600    "[\\u0BF0\\u0BF1\\u0BF2]" /*roundtrip exclusions*/,
1601
1602    "Latin-Telugu",latinForIndic, "[:Telugu:]",
1603    NULL /*roundtrip exclusions*/,
1604
1605    "Latin-Kannada",latinForIndic, "[:Kannada:]",
1606    NULL /*roundtrip exclusions*/,
1607
1608    "Latin-Malayalam",latinForIndic, "[:Malayalam:]",
1609    NULL /*roundtrip exclusions*/
1610};
1611
1612void TransliteratorRoundTripTest::TestDebug(const char* name,const char fromSet[],
1613                                            const char* toSet,const char* exclusions){
1614
1615    RTTest test(name);
1616    Legal *legal = new LegalIndic();
1617    test.test(UnicodeString(fromSet,""),UnicodeString(toSet,""),exclusions,this,quick,legal);
1618}
1619
1620void TransliteratorRoundTripTest::TestInterIndic() {
1621    //TestDebug("Latin-Gurmukhi", latinForIndic, "[:Gurmukhi:]","[\\u0965\\u0a02\\u0a72\\u0a73\\u0a74]",TRUE);
1622    int32_t num = (int32_t)(sizeof(interIndicArray)/(INTER_INDIC_ARRAY_WIDTH*sizeof(char*)));
1623    if(quick){
1624        logln("Testing only 5 of %i. Skipping rest (use -e for exhaustive)",num);
1625        num = 5;
1626    }
1627    for(int i = 0; i < num;i++){
1628        RTTest test(interIndicArray[i*INTER_INDIC_ARRAY_WIDTH + 0]);
1629        Legal *legal = new LegalIndic();
1630        logln(UnicodeString("Stress testing ") + interIndicArray[i*INTER_INDIC_ARRAY_WIDTH + 0]);
1631      if( !logKnownIssue( "cldrbug:1911" ) ) {
1632        /* "full test" */
1633        // CLDR bug #1911: This test should be moved into CLDR.
1634        test.test(  interIndicArray[i*INTER_INDIC_ARRAY_WIDTH + 1],
1635                    interIndicArray[i*INTER_INDIC_ARRAY_WIDTH + 2],
1636                    interIndicArray[i*INTER_INDIC_ARRAY_WIDTH + 3], // roundtrip exclusions
1637                    this, quick, legal, 50);
1638      } else {
1639        // It is left in its current state as a regression test.
1640        // CLDR should test, and remove the age filter.
1641          /* regression test - ""temporary"" until CLDR#1911 is fixed */
1642        // start
1643        UnicodeString source("[");
1644        source.append(interIndicArray[i*INTER_INDIC_ARRAY_WIDTH + 1]);
1645        source.append(" & [:Age=4.1:]]");
1646        UnicodeString target("[");
1647        target.append(interIndicArray[i*INTER_INDIC_ARRAY_WIDTH + 2]);
1648        target.append(" & [:Age=4.1:]]");
1649        test.test(  source,
1650                    target,
1651                    interIndicArray[i*INTER_INDIC_ARRAY_WIDTH + 3], // roundtrip exclusions
1652                    this, quick, legal, 50);
1653        // end
1654        delete legal;
1655      }
1656    }
1657}
1658
1659// end indic tests ----------------------------------------------------------
1660
1661#endif /* #if !UCONFIG_NO_TRANSLITERATION */
1662