1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************
8************************************************************************
9*   Date        Name        Description
10*   02/28/2001  aliu        Creation
11*   03/01/2001  George      port to HP/UX
12************************************************************************/
13
14#include "unicode/utypes.h"
15
16#if !UCONFIG_NO_TRANSLITERATION
17
18#include "jamotest.h"
19#include "unicode/utypes.h"
20#include "unicode/translit.h"
21#include "cmemory.h"
22#include "cpdtrans.h"
23
24// SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin
25#define SEP "-"
26
27JamoTest::JamoTest()
28{
29    UParseError parseError;
30    UErrorCode status = U_ZERO_ERROR;
31    NAME_JAMO = Transliterator::createFromRules("Name-Jamo",
32                                            UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
33                                            UTRANS_FORWARD, parseError, status);
34
35    if (U_FAILURE(status)) {
36        delete NAME_JAMO;
37        NAME_JAMO = NULL;
38    }
39    status = U_ZERO_ERROR;
40    JAMO_NAME = Transliterator::createFromRules("Jamo-Name",
41                                            UnicodeString(JAMO_NAMES_RULES, -1, US_INV),
42                                            UTRANS_REVERSE, parseError, status);
43    if (U_FAILURE(status)) {
44        delete JAMO_NAME;
45        JAMO_NAME = NULL;
46    }
47}
48
49JamoTest::~JamoTest()
50{
51    delete NAME_JAMO;
52    delete JAMO_NAME;
53}
54
55void
56JamoTest::runIndexedTest(int32_t index, UBool exec,
57                         const char* &name, char* /*par*/) {
58    switch (index) {
59        TESTCASE(0,TestJamo);
60        TESTCASE(1,TestRealText);
61        TESTCASE(2,TestPiecemeal);
62        default: name = ""; break;
63    }
64}
65
66void
67JamoTest::TestJamo() {
68    UParseError parseError;
69    UErrorCode status = U_ZERO_ERROR;
70    Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
71
72    if (latinJamo == 0 || U_FAILURE(status)) {
73        dataerrln("FAIL: createInstance() returned 0 - %s", u_errorName(status));
74        return;
75    }
76
77    Transliterator* jamoLatin = latinJamo->createInverse(status);
78
79    if (jamoLatin == 0) {
80        delete latinJamo;
81        errln("FAIL: createInverse() returned 0");
82        return;
83    }
84
85    static const char* CASE[] = {
86        // Column 1 is the latin text L1 to be fed to Latin-Jamo
87        // to yield output J.
88
89        // Column 2 is expected value of J.  J is fed to
90        // Jamo-Latin to yield output L2.
91
92        // Column 3 is expected value of L2.  If the expected
93        // value of L2 is L1, then L2 is NULL.
94
95                // add tests for the update to fix problems where it didn't follow the standard
96                // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html
97                "gach", "(Gi)(A)(Cf)", NULL,
98                "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL,
99                "choe", "(Ci)(OE)", NULL,
100                "wo", "(IEUNG)(WEO)", NULL,
101                "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil",
102                "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum",
103                "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum",
104                "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae",
105                "gaga", "(Gi)(A)(Gi)(A)", NULL,
106                "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL,
107                "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL,
108                "gakka", "(Gi)(A)(GGi)(A)", NULL,
109                "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL,
110                "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL,
111                "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL,
112
113        "bab", "(Bi)(A)(Bf)", NULL,
114        "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu",
115        "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba",
116        "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu",
117        "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga",
118        //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga",
119        "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL,
120        "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL,
121        "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf)
122        "gga", "(Gi)(EU)(Gi)(A)", "geuga",
123        "bsa", "(Bi)(EU)(Si)(A)", "beusa",
124        "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu",
125        "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL,
126        "la", "(R)(A)", NULL,
127        "bs", "(Bi)(EU)(Sf)", "beus",
128        "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga",
129
130        // 'r' in a final position is treated like 'l'
131        "karka", "(Ki)(A)(L)(Ki)(A)", "kalka",
132    };
133
134    enum { CASE_length = UPRV_LENGTHOF(CASE) };
135
136    int32_t i;
137    for (i=0; i<CASE_length; i+=3) {
138        UnicodeString jamo = nameToJamo(CASE[i+1]);
139        if (CASE[i+2] == NULL) {
140            expect(*latinJamo, CASE[i], jamo, *jamoLatin);
141        } else {
142            // Handle case where round-trip is expected to fail
143            expect(*latinJamo, CASE[i], jamo);
144            expect(*jamoLatin, jamo, CASE[i+2]);
145        }
146    }
147
148    delete latinJamo;
149    delete jamoLatin;
150}
151
152/**
153 * Test various step-at-a-time transformation of hangul to jamo to
154 * latin and back.
155 */
156void JamoTest::TestPiecemeal(void) {
157    UnicodeString hangul; hangul.append((UChar)0xBC0F);
158    UnicodeString jamo = nameToJamo("(Mi)(I)(Cf)");
159    UnicodeString latin("mic");
160    UnicodeString latin2("mich");
161
162    Transliterator *t = NULL;
163    UErrorCode status = U_ZERO_ERROR;
164
165    t = Transliterator::createInstance("NFD", UTRANS_FORWARD, status); // was Hangul-Jamo
166    if (U_FAILURE(status) || t == 0) {
167        dataerrln("FAIL: createInstance failed");
168        return;
169    }
170    expect(*t, hangul, jamo);
171    delete t;
172
173    t = Transliterator::createInstance("NFC", UTRANS_FORWARD, status); // was Jamo-Hangul
174    if (U_FAILURE(status) || t == 0) {
175        errln("FAIL: createInstance failed");
176        return;
177    }
178    expect(*t, jamo, hangul);
179    delete t;
180
181    t = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, status);
182    if (U_FAILURE(status) || t == 0) {
183        dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
184        return;
185    }
186    expect(*t, latin, jamo);
187    delete t;
188
189    t = Transliterator::createInstance("Jamo-Latin", UTRANS_FORWARD, status);
190    if (U_FAILURE(status) || t == 0) {
191        errln("FAIL: createInstance failed");
192        return;
193    }
194    expect(*t, jamo, latin2);
195    delete t;
196
197    t = Transliterator::createInstance("Hangul-Latin", UTRANS_FORWARD, status);
198    if (U_FAILURE(status) || t == 0) {
199        errln("FAIL: createInstance failed");
200        return;
201    }
202    expect(*t, hangul, latin2);
203    delete t;
204
205    t = Transliterator::createInstance("Latin-Hangul", UTRANS_FORWARD, status);
206    if (U_FAILURE(status) || t == 0) {
207        errln("FAIL: createInstance failed");
208        return;
209    }
210    expect(*t, latin, hangul);
211    delete t;
212
213    t = Transliterator::createInstance("Hangul-Latin; Latin-Jamo", UTRANS_FORWARD, status);
214    if (U_FAILURE(status) || t == 0) {
215        errln("FAIL: createInstance failed");
216        return;
217    }
218    expect(*t, hangul, jamo);
219    delete t;
220
221    t = Transliterator::createInstance("Jamo-Latin; Latin-Hangul", UTRANS_FORWARD, status);
222    if (U_FAILURE(status) || t == 0) {
223        errln("FAIL: createInstance failed");
224        return;
225    }
226    expect(*t, jamo, hangul);
227    delete t;
228
229    t = Transliterator::createInstance("Hangul-Latin; Latin-Hangul", UTRANS_FORWARD, status);
230    if (U_FAILURE(status) || t == 0) {
231        errln("FAIL: createInstance failed");
232        return;
233    }
234    expect(*t, hangul, hangul);
235    delete t;
236}
237
238void
239JamoTest::TestRealText() {
240    // Test text taken from the Unicode web site
241     static const char* const WHAT_IS_UNICODE[] = {
242      "\\uc720\\ub2c8\\ucf54\\ub4dc\\uc5d0", "\\ub300\\ud574", "?",
243
244      "\\uc5b4\\ub5a4", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b4\\ub5a4",
245      "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b4\\ub5a4", "\\uc5b8\\uc5b4\\uc5d0\\ub3c4",
246      "\\uc0c1\\uad00\\uc5c6\\uc774", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\ubaa8\\ub4e0",
247      "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uace0\\uc720", "\\ubc88\\ud638\\ub97c",
248      "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.",
249
250      "\\uae30\\ubcf8\\uc801\\uc73c\\ub85c", "\\ucef4\\ud4e8\\ud130\\ub294",
251      "\\uc22b\\uc790\\ub9cc", "\\ucc98\\ub9ac\\ud569\\ub2c8\\ub2e4.", "\\uae00\\uc790\\ub098",
252      "\\ub2e4\\ub978", "\\ubb38\\uc790\\uc5d0\\ub3c4", "\\uc22b\\uc790\\ub97c",
253      "\\uc9c0\\uc815\\ud558\\uc5ec",
254      "\\uc800\\uc7a5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\uac00",
255      "\\uac1c\\ubc1c\\ub418\\uae30", "\\uc804\\uc5d0\\ub294", "\\uc774\\ub7ec\\ud55c",
256      "\\uc22b\\uc790\\ub97c", "\\uc9c0\\uc815\\ud558\\uae30", "\\uc704\\ud574", "\\uc218\\ubc31",
257      "\\uac00\\uc9c0\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654",
258      "\\uc2dc\\uc2a4\\ud15c\\uc744",
259      "\\uc0ac\\uc6a9\\ud588\\uc2b5\\ub2c8\\ub2e4.", "\\ub2e8\\uc77c", "\\uae30\\ud638\\ud654",
260      "\\ubc29\\ubc95\\uc73c\\ub85c\\ub294", "\\ubaa8\\ub4e0", "\\ubb38\\uc790\\ub97c",
261      "\\ud3ec\\ud568\\ud560", "\\uc218", "\\uc5c6\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc608\\ub97c",
262      "\\ub4e4\\uc5b4", "\\uc720\\ub7fd", "\\uc5f0\\ud569\\uc5d0\\uc11c\\ub9cc",
263      "\\ubcf4\\ub354\\ub77c\\ub3c4", "\\ubaa8\\ub4e0", "\\uac01", "\\ub098\\ub77c\\ubcc4",
264      "\\uc5b8\\uc5b4\\ub97c", "\\ucc98\\ub9ac\\ud558\\ub824\\uba74", "\\uc5ec\\ub7ec",
265      "\\uac1c\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774",
266      "\\ud544\\uc694\\ud569\\ub2c8\\ub2e4.", "\\uc601\\uc5b4\\uc640", "\\uac19\\uc740",
267      "\\ub2e8\\uc77c", "\\uc5b8\\uc5b4\\uc758", "\\uacbd\\uc6b0\\ub3c4",
268      "\\uacf5\\ud1b5\\uc801\\uc73c\\ub85c", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
269      "\\uae00\\uc790,", "\\ubb38\\uc7a5", "\\ubd80\\ud638", "\\ubc0f",
270      "\\ud14c\\ud06c\\ub2c8\\uceec", "\\uae30\\ud638\\uc5d0", "\\ub9de\\ub294", "\\ub2e8\\uc77c",
271      "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc744", "\\uac16\\uace0", "\\uc788\\uc9c0",
272      "\\ubabb\\ud558\\uc600\\uc2b5\\ub2c8\\ub2e4.",
273
274      "\\uc774\\ub7ec\\ud55c", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uc740",
275      "\\ub610\\ud55c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uacfc",
276      "\\ucda9\\ub3cc\\ud569\\ub2c8\\ub2e4.", "\\uc989", "\\ub450", "\\uac00\\uc9c0",
277      "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", "\\ub450", "\\uac1c\\uc758", "\\ub2e4\\ub978",
278      "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uac19\\uc740", "\\ubc88\\ud638\\ub97c",
279      "\\uc0ac\\uc6a9\\ud558\\uac70\\ub098", "\\uac19\\uc740", "\\ubb38\\uc790\\uc5d0",
280      "\\ub300\\ud574", "\\ub2e4\\ub978", "\\ubc88\\ud638\\ub97c", "\\uc0ac\\uc6a9\\ud560", "\\uc218",
281      "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc8fc\\uc5b4\\uc9c4", "\\ubaa8\\ub4e0",
282      "\\ucef4\\ud4e8\\ud130(\\ud2b9\\ud788", "\\uc11c\\ubc84)\\ub294", "\\uc11c\\ub85c",
283      "\\ub2e4\\ub978", "\\uc5ec\\ub7ec", "\\uac00\\uc9c0", "\\uae30\\ud638\\ud654",
284      "\\ubc29\\ubc95\\uc744", "\\uc9c0\\uc6d0\\ud574\\uc57c",
285      "\\ud569\\ub2c8\\ub2e4.", "\\uadf8\\ub7ec\\ub098,", "\\ub370\\uc774\\ud130\\ub97c",
286      "\\uc11c\\ub85c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774\\ub098",
287      "\\ud50c\\ub7ab\\ud3fc", "\\uac04\\uc5d0", "\\uc804\\ub2ec\\ud560", "\\ub54c\\ub9c8\\ub2e4",
288      "\\uadf8", "\\ub370\\uc774\\ud130\\ub294", "\\ud56d\\uc0c1", "\\uc190\\uc0c1\\uc758",
289      "\\uc704\\ud5d8\\uc744", "\\uacaa\\uac8c", "\\ub429\\ub2c8\\ub2e4.",
290
291      "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub85c", "\\ubaa8\\ub4e0", "\\uac83\\uc744",
292      "\\ud574\\uacb0\\ud560", "\\uc218", "\\uc788\\uc2b5\\ub2c8\\ub2e4!",
293      "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\uc0ac\\uc6a9", "\\uc911\\uc778",
294      "\\ud50c\\ub7ab\\ud3fc,", "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b8\\uc5b4\\uc5d0",
295      "\\uad00\\uacc4\\uc5c6\\uc774", "\\ubb38\\uc790\\ub9c8\\ub2e4", "\\uace0\\uc720\\ud55c",
296      "\\uc22b\\uc790\\ub97c",
297      "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
298      "\\ud45c\\uc900\\uc740", // "Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, "
299      // "Sun, Sybase, Unisys "
300      "\\ubc0f", "\\uae30\\ud0c0", "\\uc5ec\\ub7ec",
301      "\\ud68c\\uc0ac\\uc640", "\\uac19\\uc740", "\\uc5c5\\uacc4",
302      "\\uc120\\ub450\\uc8fc\\uc790\\uc5d0", "\\uc758\\ud574",
303      "\\ucc44\\ud0dd\\ub418\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294",
304      // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML "
305      "\\ub4f1\\uacfc",
306      "\\uac19\\uc774", "\\ud604\\uc7ac", "\\ub110\\ub9ac", "\\uc0ac\\uc6a9\\ub418\\ub294",
307      "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud544\\uc694\\ud558\\uba70", "\\uc774\\ub294", // "ISO/IEC",
308      "10646\\uc744", "\\uad6c\\ud604\\ud558\\ub294", "\\uacf5\\uc2dd\\uc801\\uc778",
309      "\\ubc29\\ubc95\\uc785\\ub2c8\\ub2e4.", "\\uc774\\ub294", "\\ub9ce\\uc740", "\\uc6b4\\uc601",
310      "\\uccb4\\uc81c,", "\\uc694\\uc998", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0",
311      "\\ube0c\\ub77c\\uc6b0\\uc800", "\\ubc0f", "\\uae30\\ud0c0", "\\ub9ce\\uc740",
312      "\\uc81c\\ud488\\uc5d0\\uc11c",
313      "\\uc9c0\\uc6d0\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
314      "\\ud45c\\uc900\\uc758", "\\ubd80\\uc0c1\\uacfc", "\\uc774\\ub97c",
315      "\\uc9c0\\uc6d0\\ud558\\ub294", "\\ub3c4\\uad6c\\uc758", "\\uac00\\uc6a9\\uc131\\uc740",
316      "\\ucd5c\\uadfc", "\\uc804", "\\uc138\\uacc4\\uc5d0", "\\ubd88\\uace0", "\\uc788\\ub294",
317      "\\uae30\\uc220", "\\uacbd\\ud5a5\\uc5d0\\uc11c", "\\uac00\\uc7a5", "\\uc911\\uc694\\ud55c",
318      "\\ubd80\\ubd84\\uc744", "\\ucc28\\uc9c0\\ud558\\uace0", "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
319
320      "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
321      // Replaced a hyphen with a space to make the test case work with CLDR1.5
322      //"\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8-\\uc11c\\ubc84", "\\ub610\\ub294",
323      "\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8 \\uc11c\\ubc84", "\\ub610\\ub294",
324      // Replaced a hyphen with a space.
325      //"\\ub2e4\\uc911-\\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
326      "\\ub2e4\\uc911 \\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc",
327      "\\uc6f9", "\\uc0ac\\uc774\\ud2b8\\uc5d0", "\\ud1b5\\ud569\\ud558\\uba74",
328      "\\ub808\\uac70\\uc2dc", "\\ubb38\\uc790", "\\uc138\\ud2b8", "\\uc0ac\\uc6a9\\uc5d0",
329      "\\uc788\\uc5b4\\uc11c", "\\uc0c1\\ub2f9\\ud55c", "\\ube44\\uc6a9", "\\uc808\\uac10",
330      "\\ud6a8\\uacfc\\uac00",
331      "\\ub098\\ud0c0\\ub0a9\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c",
332      "\\ud1b5\\ud574", "\\ub9ac\\uc5d4\\uc9c0\\ub2c8\\uc5b4\\ub9c1", "\\uc5c6\\uc774",
333      "\\ub2e4\\uc911", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b8\\uc5b4", "\\ubc0f", "\\uad6d\\uac00",
334      "\\uac04\\uc5d0", "\\ub2e8\\uc77c", "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4",
335      "\\ud50c\\ub7ab\\ud3fc", "\\ub610\\ub294", "\\ub2e8\\uc77c", "\\uc6f9",
336      "\\uc0ac\\uc774\\ud2b8\\ub97c", "\\ubaa9\\ud45c\\ub85c", "\\uc0bc\\uc744", "\\uc218",
337      "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc774\\ub97c", "\\uc0ac\\uc6a9\\ud558\\uba74",
338      "\\ub370\\uc774\\ud130\\ub97c", "\\uc190\\uc0c1", "\\uc5c6\\uc774", "\\uc5ec\\ub7ec",
339      "\\uc2dc\\uc2a4\\ud15c\\uc744", "\\ud1b5\\ud574", "\\uc804\\uc1a1\\ud560", "\\uc218",
340      "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
341
342      "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc5d0", "\\ub300\\ud574",
343      "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc740",
344      "\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300",
345      "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc",
346      "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744",
347      "\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758",
348      "\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70",
349      "\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574",
350      "\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4",
351      "\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4",
352      "\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294",
353      "\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758",
354      "\\ubc94\\uc704\\ub97c",
355      "\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758",
356      "\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0",
357      "\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
358      "\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740",
359      "\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098",
360      "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0",
361      "\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744",
362      "\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc",
363      "\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4",
364      "\\uc788\\uc2b5\\ub2c8\\ub2e4.",
365
366      "\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,",
367      "\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5",
368      "\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0",
369      "\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c",
370      "\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624."
371    };
372
373    enum { WHAT_IS_UNICODE_length = UPRV_LENGTHOF(WHAT_IS_UNICODE) };
374
375    UParseError parseError;
376    UErrorCode status = U_ZERO_ERROR;
377    Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
378    Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status);
379    if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) {
380        delete latinJamo;
381        delete jamoHangul;
382        dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
383        return;
384    }
385    Transliterator* jamoLatin = latinJamo->createInverse(status);
386    Transliterator* hangulJamo = jamoHangul->createInverse(status);
387    if (jamoLatin == 0 || hangulJamo == 0) {
388        errln("FAIL: createInverse returned NULL");
389        delete latinJamo;
390        delete jamoLatin;
391        delete jamoHangul;
392        delete hangulJamo;
393        return;
394    }
395
396    Transliterator* tarray[4] =
397        { hangulJamo, jamoLatin, latinJamo, jamoHangul };
398    CompoundTransliterator rt(tarray, 4);
399
400    UnicodeString buf;
401    int32_t total = 0;
402    int32_t errors = 0;
403    int32_t i;
404    for (i=0; i < WHAT_IS_UNICODE_length; ++i) {
405        ++total;
406        UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV);
407        hangul = hangul.unescape(); // Parse backslash-u escapes
408        UnicodeString hangulX = hangul;
409        rt.transliterate(hangulX);
410        if (hangul != hangulX) {
411            ++errors;
412            UnicodeString jamo = hangul; hangulJamo->transliterate(jamo);
413            UnicodeString latin = jamo; jamoLatin->transliterate(latin);
414            UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2);
415            UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2);
416
417            buf.remove(0);
418            buf.append("FAIL: ");
419            if (hangul2 != hangulX) {
420                buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")");
421            }
422            // The Hangul-Jamo conversion is not usually the
423            // bug here, so we hide it from display.
424            // Uncomment lines to see the Hangul.
425            buf.append(//hangul + " => " +
426                       jamoToName(jamo) + " => " +
427                       latin + " => " + jamoToName(jamo2)
428                       //+ " => " + hangul2
429                       );
430            errln(prettify(buf));
431        }
432    }
433    if (errors != 0) {
434        errln((UnicodeString)"Test word failures: " + errors + " out of " + total);
435    } else {
436        logln((UnicodeString)"All " + total + " test words passed");
437    }
438
439    delete latinJamo;
440    delete jamoLatin;
441    delete jamoHangul;
442    delete hangulJamo;
443}
444
445// Override TransliteratorTest
446void
447JamoTest::expectAux(const UnicodeString& tag,
448                    const UnicodeString& summary, UBool pass,
449                    const UnicodeString& expectedResult) {
450    UnicodeString jsum = jamoToName(summary);
451    UnicodeString jexp = jamoToName(expectedResult);
452    TransliteratorTest::expectAux(tag, jsum, pass, jexp);
453}
454
455const char* JamoTest::JAMO_NAMES_RULES =
456        "'(Gi)' <> \\u1100;"
457        "'(GGi)' <> \\u1101;"
458        "'(Ni)' <> \\u1102;"
459        "'(Di)' <> \\u1103;"
460        "'(DD)' <> \\u1104;"
461        "'(R)' <> \\u1105;"
462        "'(Mi)' <> \\u1106;"
463        "'(Bi)' <> \\u1107;"
464        "'(BB)' <> \\u1108;"
465        "'(Si)' <> \\u1109;"
466        "'(SSi)' <> \\u110A;"
467        "'(IEUNG)' <> \\u110B;"
468        "'(Ji)' <> \\u110C;"
469        "'(JJ)' <> \\u110D;"
470        "'(Ci)' <> \\u110E;"
471        "'(Ki)' <> \\u110F;"
472        "'(Ti)' <> \\u1110;"
473        "'(Pi)' <> \\u1111;"
474        "'(Hi)' <> \\u1112;"
475
476        "'(A)' <> \\u1161;"
477        "'(AE)' <> \\u1162;"
478        "'(YA)' <> \\u1163;"
479        "'(YAE)' <> \\u1164;"
480        "'(EO)' <> \\u1165;"
481        "'(E)' <> \\u1166;"
482        "'(YEO)' <> \\u1167;"
483        "'(YE)' <> \\u1168;"
484        "'(O)' <> \\u1169;"
485        "'(WA)' <> \\u116A;"
486        "'(WAE)' <> \\u116B;"
487        "'(OE)' <> \\u116C;"
488        "'(YO)' <> \\u116D;"
489        "'(U)' <> \\u116E;"
490        "'(WEO)' <> \\u116F;"
491        "'(WE)' <> \\u1170;"
492        "'(WI)' <> \\u1171;"
493        "'(YU)' <> \\u1172;"
494        "'(EU)' <> \\u1173;"
495        "'(YI)' <> \\u1174;"
496        "'(I)' <> \\u1175;"
497
498        "'(Gf)' <> \\u11A8;"
499        "'(GGf)' <> \\u11A9;"
500        "'(GS)' <> \\u11AA;"
501        "'(Nf)' <> \\u11AB;"
502        "'(NJ)' <> \\u11AC;"
503        "'(NH)' <> \\u11AD;"
504        "'(Df)' <> \\u11AE;"
505        "'(L)' <> \\u11AF;"
506        "'(LG)' <> \\u11B0;"
507        "'(LM)' <> \\u11B1;"
508        "'(LB)' <> \\u11B2;"
509        "'(LS)' <> \\u11B3;"
510        "'(LT)' <> \\u11B4;"
511        "'(LP)' <> \\u11B5;"
512        "'(LH)' <> \\u11B6;"
513        "'(Mf)' <> \\u11B7;"
514        "'(Bf)' <> \\u11B8;"
515        "'(BS)' <> \\u11B9;"
516        "'(Sf)' <> \\u11BA;"
517        "'(SSf)' <> \\u11BB;"
518        "'(NG)' <> \\u11BC;"
519        "'(Jf)' <> \\u11BD;"
520        "'(Cf)' <> \\u11BE;"
521        "'(Kf)' <> \\u11BF;"
522        "'(Tf)' <> \\u11C0;"
523        "'(Pf)' <> \\u11C1;"
524        "'(Hf)' <> \\u11C2;";
525
526/**
527 * Convert short names to actual jamo.  E.g., "x(LG)y" returns
528 * "x\u11B0y".  See JAMO_NAMES for table of names.
529 */
530UnicodeString
531JamoTest::nameToJamo(const UnicodeString& input) {
532    if (NAME_JAMO == 0) {
533        errln("Failed to create NAME_JAMO");
534        return input;   /* failure! */
535    }
536    UnicodeString result(input);
537    NAME_JAMO->transliterate(result);
538    return result;
539}
540
541/**
542 * Convert jamo to short names.  E.g., "x\u11B0y" returns
543 * "x(LG)y".  See JAMO_NAMES for table of names.
544 */
545UnicodeString
546JamoTest::jamoToName(const UnicodeString& input) {
547    if (NAME_JAMO == 0) {
548        errln("Failed to create NAME_JAMO");
549        return input;   /* failure! */
550    }
551    UnicodeString result(input);
552    JAMO_NAME->transliterate(result);
553    return result;
554}
555
556#endif /* #if !UCONFIG_NO_TRANSLITERATION */
557