1/********************************************************************
2 * Copyright (c) 1997-2015, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************/
5
6#include <string.h>
7#include "unicode/utypes.h"
8#include "unicode/uscript.h"
9#include "unicode/uchar.h"
10#include "cintltst.h"
11#include "cucdapi.h"
12#include "cmemory.h"
13
14static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
15    int32_t i;
16    if(length == 0) {
17        strcpy(s, "(no scripts)");
18        return;
19    }
20    s[0] = 0;
21    for(i = 0; i < length; ++i) {
22        if(i > 0) {
23            strcat(s, " ");
24        }
25        strcat(s, uscript_getShortName(scripts[i]));
26    }
27}
28
29static void assertEqualScripts(const char *msg,
30                               const UScriptCode scripts1[], int32_t length1,
31                               const UScriptCode scripts2[], int32_t length2,
32                               UErrorCode errorCode) {
33    char s1[80];
34    char s2[80];
35    if(U_FAILURE(errorCode)) {
36        log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
37        return;
38    }
39    scriptsToString(scripts1, length1, s1);
40    scriptsToString(scripts2, length2, s2);
41    if(0!=strcmp(s1, s2)) {
42        log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
43    }
44}
45
46void TestUScriptCodeAPI(){
47    int i =0;
48    int numErrors =0;
49    {
50        const char* testNames[]={
51        /* test locale */
52        "en", "en_US", "sr", "ta" , "te_IN",
53        "hi", "he", "ar",
54        /* test abbr */
55        "Hani", "Hang","Hebr","Hira",
56        "Knda","Kana","Khmr","Lao",
57        "Latn",/*"Latf","Latg",*/
58        "Mlym", "Mong",
59
60        /* test names */
61        "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
62        "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
63        /* test lower case names */
64        "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
65        "oriya",     "runic",     "sinhala", "syriac","tamil",
66        "telugu",    "thaana",    "thai",    "tibetan",
67        /* test the bounds*/
68        "tagb", "arabic",
69        /* test bogus */
70        "asfdasd", "5464", "12235",
71        /* test the last index */
72        "zyyy", "YI",
73        NULL
74        };
75        UScriptCode expected[] ={
76            /* locales should return */
77            USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
78            USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
79            /* abbr should return */
80            USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
81            USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
82            USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
83            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
84            /* names should return */
85            USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
86            USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
87            /* lower case names should return */
88            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
89            USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
90            USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
91            /* bounds */
92            USCRIPT_TAGBANWA, USCRIPT_ARABIC,
93            /* bogus names should return invalid code */
94            USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
95            USCRIPT_COMMON, USCRIPT_YI,
96        };
97
98        UErrorCode err = U_ZERO_ERROR;
99
100        const int32_t capacity = 10;
101
102        for( ; testNames[i]!=NULL; i++){
103            UScriptCode script[10]={USCRIPT_INVALID_CODE};
104            uscript_getCode(testNames[i],script,capacity, &err);
105            if( script[0] != expected[i]){
106                   log_data_err("Error getting script code Got: %i  Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
107                       script[0],expected[i],testNames[i]);
108                   numErrors++;
109            }
110        }
111        if(numErrors >0 ){
112            log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
113        }
114    }
115
116    {
117        UErrorCode err = U_ZERO_ERROR;
118        int32_t capacity=0;
119        int32_t j;
120        UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
121        UScriptCode script[10]={USCRIPT_INVALID_CODE};
122        int32_t num = uscript_getCode("ja",script,capacity, &err);
123        /* preflight */
124        if(err==U_BUFFER_OVERFLOW_ERROR){
125            err = U_ZERO_ERROR;
126            capacity = 10;
127            num = uscript_getCode("ja",script,capacity, &err);
128            if(num!=(sizeof(jaCode)/sizeof(UScriptCode))){
129                log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
130                        num, (sizeof(jaCode)/sizeof(UScriptCode)));
131            }
132            for(j=0;j<sizeof(jaCode)/sizeof(UScriptCode);j++) {
133                if(script[j]!=jaCode[j]) {
134                    log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
135                            script[j], uscript_getName(script[j]),
136                            jaCode[j], uscript_getName(jaCode[j]));
137
138                }
139            }
140        }else{
141            log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
142                "U_BUFFER_OVERFLOW_ERROR",
143                 u_errorName(err));
144        }
145
146    }
147    {
148        static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
149        static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
150        static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
151        static const UScriptCode HAN[1] = { USCRIPT_HAN };
152        static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
153        static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
154        static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
155        UScriptCode scripts[5];
156        UErrorCode err;
157        int32_t num;
158
159        // Should work regardless of whether we have locale data for the language.
160        err = U_ZERO_ERROR;
161        num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
162        assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err);  // Tajik
163        err = U_ZERO_ERROR;
164        num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
165        assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err);  // Sherpa
166
167        // Multi-script languages.
168        err = U_ZERO_ERROR;
169        num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
170        assertEqualScripts("ja scripts: Kana Hira Hani",
171                           JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
172        err = U_ZERO_ERROR;
173        num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
174        assertEqualScripts("ko scripts: Hang Hani",
175                           KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
176        err = U_ZERO_ERROR;
177        num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
178        assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
179        err = U_ZERO_ERROR;
180        num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
181        assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
182        err = U_ZERO_ERROR;
183        num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
184        assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
185
186        // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
187        err = U_ZERO_ERROR;
188        num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
189        assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
190    }
191
192    {
193        UScriptCode testAbbr[]={
194            /* names should return */
195            USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
196            USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
197        };
198
199        const char* expectedNames[]={
200
201            /* test names */
202            "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
203            "Gothic",  "Greek",  "Gujarati",
204             NULL
205        };
206        i=0;
207        while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
208            const char* name = uscript_getName(testAbbr[i]);
209             if(name == NULL) {
210               log_data_err("Couldn't get script name\n");
211               return;
212             }
213            numErrors=0;
214            if(strcmp(expectedNames[i],name)!=0){
215                log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
216                numErrors++;
217            }
218            if(numErrors > 0){
219                if(numErrors >0 ){
220                    log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
221                }
222            }
223            i++;
224        }
225
226    }
227
228    {
229        UScriptCode testAbbr[]={
230            /* abbr should return */
231            USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
232            USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
233            USCRIPT_LATIN,
234            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
235        };
236
237        const char* expectedAbbr[]={
238              /* test abbr */
239            "Hani", "Hang","Hebr","Hira",
240            "Knda","Kana","Khmr","Laoo",
241            "Latn",
242            "Mlym", "Mong",
243             NULL
244        };
245        i=0;
246        while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
247            const char* name = uscript_getShortName(testAbbr[i]);
248            numErrors=0;
249            if(strcmp(expectedAbbr[i],name)!=0){
250                log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
251                numErrors++;
252            }
253            if(numErrors > 0){
254                if(numErrors >0 ){
255                    log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
256                }
257            }
258            i++;
259        }
260
261    }
262    /* now test uscript_getScript() API */
263    {
264        uint32_t codepoints[] = {
265                0x0000FF9D, /* USCRIPT_KATAKANA*/
266                0x0000FFBE, /* USCRIPT_HANGUL*/
267                0x0000FFC7, /* USCRIPT_HANGUL*/
268                0x0000FFCF, /* USCRIPT_HANGUL*/
269                0x0000FFD7, /* USCRIPT_HANGUL*/
270                0x0000FFDC, /* USCRIPT_HANGUL*/
271                0x00010300, /* USCRIPT_OLD_ITALIC*/
272                0x00010330, /* USCRIPT_GOTHIC*/
273                0x0001034A, /* USCRIPT_GOTHIC*/
274                0x00010400, /* USCRIPT_DESERET*/
275                0x00010428, /* USCRIPT_DESERET*/
276                0x0001D167, /* USCRIPT_INHERITED*/
277                0x0001D17B, /* USCRIPT_INHERITED*/
278                0x0001D185, /* USCRIPT_INHERITED*/
279                0x0001D1AA, /* USCRIPT_INHERITED*/
280                0x00020000, /* USCRIPT_HAN*/
281                0x00000D02, /* USCRIPT_MALAYALAM*/
282                0x00000D00, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
283                0x00000000, /* USCRIPT_COMMON*/
284                0x0001D169, /* USCRIPT_INHERITED*/
285                0x0001D182, /* USCRIPT_INHERITED*/
286                0x0001D18B, /* USCRIPT_INHERITED*/
287                0x0001D1AD, /* USCRIPT_INHERITED*/
288        };
289
290        UScriptCode expected[] = {
291                USCRIPT_KATAKANA ,
292                USCRIPT_HANGUL ,
293                USCRIPT_HANGUL ,
294                USCRIPT_HANGUL ,
295                USCRIPT_HANGUL ,
296                USCRIPT_HANGUL ,
297                USCRIPT_OLD_ITALIC,
298                USCRIPT_GOTHIC ,
299                USCRIPT_GOTHIC ,
300                USCRIPT_DESERET ,
301                USCRIPT_DESERET ,
302                USCRIPT_INHERITED,
303                USCRIPT_INHERITED,
304                USCRIPT_INHERITED,
305                USCRIPT_INHERITED,
306                USCRIPT_HAN ,
307                USCRIPT_MALAYALAM,
308                USCRIPT_UNKNOWN,
309                USCRIPT_COMMON,
310                USCRIPT_INHERITED ,
311                USCRIPT_INHERITED ,
312                USCRIPT_INHERITED ,
313                USCRIPT_INHERITED ,
314        };
315        UScriptCode code = USCRIPT_INVALID_CODE;
316        UErrorCode status = U_ZERO_ERROR;
317        UBool passed = TRUE;
318
319        for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
320            code = uscript_getScript(codepoints[i],&status);
321            if(U_SUCCESS(status)){
322                if( code != expected[i] ||
323                    code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
324                ) {
325                    log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
326                    passed = FALSE;
327                }
328            }else{
329                log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
330                         codepoints[i],u_errorName(status));
331                break;
332            }
333        }
334
335        if(passed==FALSE){
336           log_err("uscript_getScript failed.\n");
337        }
338    }
339    {
340        UScriptCode code= USCRIPT_INVALID_CODE;
341        UErrorCode  status = U_ZERO_ERROR;
342        code = uscript_getScript(0x001D169,&status);
343        if(code != USCRIPT_INHERITED){
344            log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
345        }
346    }
347    {
348        UScriptCode code= USCRIPT_INVALID_CODE;
349        UErrorCode  status = U_ZERO_ERROR;
350        int32_t err = 0;
351
352        for(i = 0; i<=0x10ffff; i++){
353            code =  uscript_getScript(i,&status);
354            if(code == USCRIPT_INVALID_CODE){
355                err++;
356                log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
357            }
358        }
359        if(err>0){
360            log_err("uscript_getScript failed for %d codepoints\n", err);
361        }
362    }
363    {
364        for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
365            const char* name = uscript_getName((UScriptCode)i);
366            if(name==NULL || strcmp(name,"")==0){
367                log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
368            }
369        }
370    }
371
372    {
373        /*
374         * These script codes were originally added to ICU pre-3.6, so that ICU would
375         * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
376         * These script codes were added with only short names because we don't
377         * want to invent long names ourselves.
378         * Unicode 5 and later encode some of these scripts and give them long names.
379         * Whenever this happens, the long script names here need to be updated.
380         */
381        static const char* expectedLong[] = {
382            "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
383            "Egyd", "Egyh", "Egyptian_Hieroglyphs",
384            "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
385            "Javanese", "Kayah_Li", "Latf", "Latg",
386            "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
387            "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
388            "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
389            "Zxxx", "Unknown",
390            "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
391            "Moon", "Meetei_Mayek",
392            /* new in ICU 4.0 */
393            "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
394            "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
395            "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
396            "Zmth", "Zsym",
397            /* new in ICU 4.4 */
398            "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
399            /* new in ICU 4.6 */
400            "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
401            "Loma", "Mende_Kikakui", "Meroitic_Cursive",
402            "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
403            /* new in ICU 4.8 */
404            "Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
405            /* new in ICU 49 */
406            "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
407            /* new in ICU 52 */
408            "Caucasian_Albanian", "Mahajani",
409            /* new in ICU 54 */
410            "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham"
411        };
412        static const char* expectedShort[] = {
413            "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
414            "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
415            "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
416            "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
417            "Zxxx", "Zzzz",
418            "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
419            "Moon", "Mtei",
420            /* new in ICU 4.0 */
421            "Armi", "Avst", "Cakm", "Kore",
422            "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
423            "Zmth", "Zsym",
424            /* new in ICU 4.4 */
425            "Bamu", "Lisu", "Nkgb", "Sarb",
426            /* new in ICU 4.6 */
427            "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
428            "Narb", "Nbat", "Palm", "Sind", "Wara",
429            /* new in ICU 4.8 */
430            "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
431            /* new in ICU 49 */
432            "Hluw", "Khoj", "Tirh",
433            /* new in ICU 52 */
434            "Aghb", "Mahj",
435            /* new in ICU 54 */
436            "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd"
437        };
438        int32_t j = 0;
439        if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
440            log_err("need to add new script codes in cucdapi.c!\n");
441            return;
442        }
443        for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
444            const char* name = uscript_getName((UScriptCode)i);
445            if(name==NULL || strcmp(name,expectedLong[j])!=0){
446                log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
447            }
448            name = uscript_getShortName((UScriptCode)i);
449            if(name==NULL || strcmp(name,expectedShort[j])!=0){
450                log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
451            }
452        }
453        for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
454            UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
455            UErrorCode status = U_ZERO_ERROR;
456            int32_t len = 0;
457            len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
458            if(U_FAILURE(status)){
459                log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
460            }
461            if(len>1){
462                log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
463            }
464            if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
465                log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
466            }
467        }
468    }
469
470    {
471        /* test characters which have Script_Extensions */
472        UErrorCode errorCode=U_ZERO_ERROR;
473        if(!(
474                USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
475                USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
476                USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
477            U_FAILURE(errorCode)
478        ) {
479            log_err("uscript_getScript(character with Script_Extensions) failed\n");
480        }
481    }
482}
483
484void TestHasScript() {
485    if(!(
486        !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
487        uscript_hasScript(0x063f, USCRIPT_ARABIC) &&  /* main Script value */
488        !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
489        !uscript_hasScript(0x063f, USCRIPT_THAANA))
490    ) {
491        log_err("uscript_hasScript(U+063F, ...) is wrong\n");
492    }
493    if(!(
494        !uscript_hasScript(0x0640, USCRIPT_COMMON) &&  /* main Script value */
495        uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
496        uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
497        !uscript_hasScript(0x0640, USCRIPT_THAANA))
498    ) {
499        log_err("uscript_hasScript(U+0640, ...) is wrong\n");
500    }
501    if(!(
502        !uscript_hasScript(0x0650, USCRIPT_INHERITED) &&  /* main Script value */
503        uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
504        uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
505        !uscript_hasScript(0x0650, USCRIPT_THAANA))
506    ) {
507        log_err("uscript_hasScript(U+0650, ...) is wrong\n");
508    }
509    if(!(
510        !uscript_hasScript(0x0660, USCRIPT_COMMON) &&  /* main Script value */
511        uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
512        !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
513        uscript_hasScript(0x0660, USCRIPT_THAANA))
514    ) {
515        log_err("uscript_hasScript(U+0660, ...) is wrong\n");
516    }
517    if(!(
518        !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
519        uscript_hasScript(0xfdf2, USCRIPT_ARABIC) &&  /* main Script value */
520        !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
521        uscript_hasScript(0xfdf2, USCRIPT_THAANA))
522    ) {
523        log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
524    }
525    if(uscript_hasScript(0x0640, 0xaffe)) {
526        /* An unguarded implementation might go into an infinite loop. */
527        log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
528    }
529}
530
531static UBool scriptsContain(int32_t scripts[], int32_t length, int32_t script) {
532    UBool contain=FALSE;
533    int32_t prev=-1, i;
534    for(i=0; i<length; ++i) {
535        int32_t s=scripts[i];
536        if(s<=prev) {
537            log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
538        }
539        if(s==script) { contain=TRUE; }
540    }
541    return contain;
542}
543
544void TestGetScriptExtensions() {
545    UScriptCode scripts[20];
546    int32_t length;
547    UErrorCode errorCode;
548
549    /* errors and overflows */
550    errorCode=U_PARSE_ERROR;
551    length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
552    if(errorCode!=U_PARSE_ERROR) {
553        log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
554              u_errorName(errorCode));
555    }
556    errorCode=U_ZERO_ERROR;
557    length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
558    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
559        log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
560              u_errorName(errorCode));
561    }
562    errorCode=U_ZERO_ERROR;
563    length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
564    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
565        log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
566              u_errorName(errorCode));
567    }
568    errorCode=U_ZERO_ERROR;
569    length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
570    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
571        log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
572              (int)length, u_errorName(errorCode));
573    }
574    errorCode=U_ZERO_ERROR;
575    length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
576    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
577        log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
578              (int)length, u_errorName(errorCode));
579    }
580    /* U+063F has only a Script code, no Script_Extensions. */
581    errorCode=U_ZERO_ERROR;
582    length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
583    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
584        log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
585              (int)length, u_errorName(errorCode));
586    }
587
588    /* invalid code points */
589    errorCode=U_ZERO_ERROR;
590    length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
591    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
592        log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
593              (int)length, u_errorName(errorCode));
594    }
595    errorCode=U_ZERO_ERROR;
596    length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
597    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
598        log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
599              (int)length, u_errorName(errorCode));
600    }
601
602    /* normal usage */
603    errorCode=U_ZERO_ERROR;
604    length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
605    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
606        log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
607              (int)length, u_errorName(errorCode));
608    }
609    errorCode=U_ZERO_ERROR;
610    length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
611    if(U_FAILURE(errorCode) || length<3 ||
612            !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
613            !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
614            !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
615        log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
616              (int)length, u_errorName(errorCode));
617    }
618    errorCode=U_ZERO_ERROR;
619    length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
620    if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
621        log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
622              (int)length, u_errorName(errorCode));
623    }
624    errorCode=U_ZERO_ERROR;
625    length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
626    if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
627        log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
628              (int)length, u_errorName(errorCode));
629    }
630}
631
632void TestScriptMetadataAPI() {
633    /* API & code coverage. More testing in intltest/ucdtest.cpp. */
634    UErrorCode errorCode=U_ZERO_ERROR;
635    UChar sample[8];
636
637    if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
638            U_FAILURE(errorCode) ||
639            uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
640            sample[1]!=0) {
641        log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
642    }
643    sample[0]=0xfffe;
644    if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
645            errorCode!=U_BUFFER_OVERFLOW_ERROR ||
646            sample[0]!=0xfffe) {
647        log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
648    }
649    errorCode=U_ZERO_ERROR;
650    if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
651            U_FAILURE(errorCode) ||
652            sample[0]!=0) {
653        log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
654    }
655    sample[0]=0xfffe;
656    if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
657            errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
658            sample[0]!=0xfffe) {
659        log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
660    }
661
662    if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
663            uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
664            uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
665            uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
666            uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
667            uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
668            uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
669        log_err("uscript_getUsage() failed\n");
670    }
671
672    if(uscript_isRightToLeft(USCRIPT_LATIN) ||
673            uscript_isRightToLeft(USCRIPT_CIRTH) ||
674            !uscript_isRightToLeft(USCRIPT_ARABIC) ||
675            !uscript_isRightToLeft(USCRIPT_HEBREW)) {
676        log_err("uscript_isRightToLeft() failed\n");
677    }
678
679    if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
680            uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
681            !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
682            !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
683        log_err("uscript_breaksBetweenLetters() failed\n");
684    }
685
686    if(uscript_isCased(USCRIPT_CIRTH) ||
687            uscript_isCased(USCRIPT_HAN) ||
688            !uscript_isCased(USCRIPT_LATIN) ||
689            !uscript_isCased(USCRIPT_GREEK)) {
690        log_err("uscript_isCased() failed\n");
691    }
692}
693
694void TestBinaryValues() {
695    /*
696     * Unicode 5.1 explicitly defines binary property value aliases.
697     * Verify that they are all recognized.
698     */
699    static const char *const falseValues[]={ "N", "No", "F", "False" };
700    static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
701    int32_t i;
702    for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
703        if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
704            log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
705        }
706    }
707    for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
708        if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
709            log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
710        }
711    }
712}
713