1/********************************************************************
2 * Copyright (c) 1997-2013, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************/
5
6#include <string.h>
7#include "unicode/utypes.h"
8#include "unicode/uscript.h"
9#include "unicode/uchar.h"
10#include "cintltst.h"
11#include "cucdapi.h"
12
13#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
14
15void TestUScriptCodeAPI(){
16    int i =0;
17    int numErrors =0;
18    {
19        const char* testNames[]={
20        /* test locale */
21        "en", "en_US", "sr", "ta" , "te_IN",
22        "hi", "he", "ar",
23        /* test abbr */
24        "Hani", "Hang","Hebr","Hira",
25        "Knda","Kana","Khmr","Lao",
26        "Latn",/*"Latf","Latg",*/
27        "Mlym", "Mong",
28
29        /* test names */
30        "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
31        "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
32        /* test lower case names */
33        "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
34        "oriya",     "runic",     "sinhala", "syriac","tamil",
35        "telugu",    "thaana",    "thai",    "tibetan",
36        /* test the bounds*/
37        "tagb", "arabic",
38        /* test bogus */
39        "asfdasd", "5464", "12235",
40        /* test the last index */
41        "zyyy", "YI",
42        '\0'
43        };
44        UScriptCode expected[] ={
45            /* locales should return */
46            USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
47            USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
48            /* abbr should return */
49            USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
50            USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
51            USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
52            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
53            /* names should return */
54            USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
55            USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
56            /* lower case names should return */
57            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
58            USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
59            USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
60            /* bounds */
61            USCRIPT_TAGBANWA, USCRIPT_ARABIC,
62            /* bogus names should return invalid code */
63            USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
64            USCRIPT_COMMON, USCRIPT_YI,
65        };
66
67        UErrorCode err = U_ZERO_ERROR;
68
69        const int32_t capacity = 10;
70
71        for( ; testNames[i]!='\0'; i++){
72            UScriptCode script[10]={USCRIPT_INVALID_CODE};
73            uscript_getCode(testNames[i],script,capacity, &err);
74            if( script[0] != expected[i]){
75                   log_data_err("Error getting script code Got: %i  Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
76                       script[0],expected[i],testNames[i]);
77                   numErrors++;
78            }
79        }
80        if(numErrors >0 ){
81            log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
82        }
83    }
84
85    {
86        UErrorCode err = U_ZERO_ERROR;
87        int32_t capacity=0;
88        int32_t j;
89        UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
90        UScriptCode script[10]={USCRIPT_INVALID_CODE};
91        int32_t num = uscript_getCode("ja",script,capacity, &err);
92        /* preflight */
93        if(err==U_BUFFER_OVERFLOW_ERROR){
94            err = U_ZERO_ERROR;
95            capacity = 10;
96            num = uscript_getCode("ja",script,capacity, &err);
97            if(num!=(sizeof(jaCode)/sizeof(UScriptCode))){
98                log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
99                        num, (sizeof(jaCode)/sizeof(UScriptCode)));
100            }
101            for(j=0;j<sizeof(jaCode)/sizeof(UScriptCode);j++) {
102                if(script[j]!=jaCode[j]) {
103                    log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
104                            script[j], uscript_getName(script[j]),
105                            jaCode[j], uscript_getName(jaCode[j]));
106
107                }
108            }
109        }else{
110            log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
111                "U_BUFFER_OVERFLOW_ERROR",
112                 u_errorName(err));
113        }
114
115    }
116
117    {
118        UScriptCode testAbbr[]={
119            /* names should return */
120            USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
121            USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
122        };
123
124        const char* expectedNames[]={
125
126            /* test names */
127            "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
128            "Gothic",  "Greek",  "Gujarati",
129             '\0'
130        };
131        i=0;
132        while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
133            const char* name = uscript_getName(testAbbr[i]);
134             if(name == NULL) {
135               log_data_err("Couldn't get script name\n");
136               return;
137             }
138            numErrors=0;
139            if(strcmp(expectedNames[i],name)!=0){
140                log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
141                numErrors++;
142            }
143            if(numErrors > 0){
144                if(numErrors >0 ){
145                    log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
146                }
147            }
148            i++;
149        }
150
151    }
152
153    {
154        UScriptCode testAbbr[]={
155            /* abbr should return */
156            USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
157            USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
158            USCRIPT_LATIN,
159            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
160        };
161
162        const char* expectedAbbr[]={
163              /* test abbr */
164            "Hani", "Hang","Hebr","Hira",
165            "Knda","Kana","Khmr","Laoo",
166            "Latn",
167            "Mlym", "Mong",
168             '\0'
169        };
170        i=0;
171        while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
172            const char* name = uscript_getShortName(testAbbr[i]);
173            numErrors=0;
174            if(strcmp(expectedAbbr[i],name)!=0){
175                log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
176                numErrors++;
177            }
178            if(numErrors > 0){
179                if(numErrors >0 ){
180                    log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
181                }
182            }
183            i++;
184        }
185
186    }
187    /* now test uscript_getScript() API */
188    {
189        uint32_t codepoints[] = {
190                0x0000FF9D, /* USCRIPT_KATAKANA*/
191                0x0000FFBE, /* USCRIPT_HANGUL*/
192                0x0000FFC7, /* USCRIPT_HANGUL*/
193                0x0000FFCF, /* USCRIPT_HANGUL*/
194                0x0000FFD7, /* USCRIPT_HANGUL*/
195                0x0000FFDC, /* USCRIPT_HANGUL*/
196                0x00010300, /* USCRIPT_OLD_ITALIC*/
197                0x00010330, /* USCRIPT_GOTHIC*/
198                0x0001034A, /* USCRIPT_GOTHIC*/
199                0x00010400, /* USCRIPT_DESERET*/
200                0x00010428, /* USCRIPT_DESERET*/
201                0x0001D167, /* USCRIPT_INHERITED*/
202                0x0001D17B, /* USCRIPT_INHERITED*/
203                0x0001D185, /* USCRIPT_INHERITED*/
204                0x0001D1AA, /* USCRIPT_INHERITED*/
205                0x00020000, /* USCRIPT_HAN*/
206                0x00000D02, /* USCRIPT_MALAYALAM*/
207                0x00000D00, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
208                0x00000000, /* USCRIPT_COMMON*/
209                0x0001D169, /* USCRIPT_INHERITED*/
210                0x0001D182, /* USCRIPT_INHERITED*/
211                0x0001D18B, /* USCRIPT_INHERITED*/
212                0x0001D1AD, /* USCRIPT_INHERITED*/
213        };
214
215        UScriptCode expected[] = {
216                USCRIPT_KATAKANA ,
217                USCRIPT_HANGUL ,
218                USCRIPT_HANGUL ,
219                USCRIPT_HANGUL ,
220                USCRIPT_HANGUL ,
221                USCRIPT_HANGUL ,
222                USCRIPT_OLD_ITALIC,
223                USCRIPT_GOTHIC ,
224                USCRIPT_GOTHIC ,
225                USCRIPT_DESERET ,
226                USCRIPT_DESERET ,
227                USCRIPT_INHERITED,
228                USCRIPT_INHERITED,
229                USCRIPT_INHERITED,
230                USCRIPT_INHERITED,
231                USCRIPT_HAN ,
232                USCRIPT_MALAYALAM,
233                USCRIPT_UNKNOWN,
234                USCRIPT_COMMON,
235                USCRIPT_INHERITED ,
236                USCRIPT_INHERITED ,
237                USCRIPT_INHERITED ,
238                USCRIPT_INHERITED ,
239        };
240        UScriptCode code = USCRIPT_INVALID_CODE;
241        UErrorCode status = U_ZERO_ERROR;
242        UBool passed = TRUE;
243
244        for(i=0; i<LENGTHOF(codepoints); ++i){
245            code = uscript_getScript(codepoints[i],&status);
246            if(U_SUCCESS(status)){
247                if( code != expected[i] ||
248                    code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
249                ) {
250                    log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
251                    passed = FALSE;
252                }
253            }else{
254                log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
255                         codepoints[i],u_errorName(status));
256                break;
257            }
258        }
259
260        if(passed==FALSE){
261           log_err("uscript_getScript failed.\n");
262        }
263    }
264    {
265        UScriptCode code= USCRIPT_INVALID_CODE;
266        UErrorCode  status = U_ZERO_ERROR;
267        code = uscript_getScript(0x001D169,&status);
268        if(code != USCRIPT_INHERITED){
269            log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
270        }
271    }
272    {
273        UScriptCode code= USCRIPT_INVALID_CODE;
274        UErrorCode  status = U_ZERO_ERROR;
275        int32_t err = 0;
276
277        for(i = 0; i<=0x10ffff; i++){
278            code =  uscript_getScript(i,&status);
279            if(code == USCRIPT_INVALID_CODE){
280                err++;
281                log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
282            }
283        }
284        if(err>0){
285            log_err("uscript_getScript failed for %d codepoints\n", err);
286        }
287    }
288    {
289        for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
290            const char* name = uscript_getName((UScriptCode)i);
291            if(name==NULL || strcmp(name,"")==0){
292                log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
293            }
294        }
295    }
296
297    {
298        /*
299         * These script codes were originally added to ICU pre-3.6, so that ICU would
300         * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
301         * These script codes were added with only short names because we don't
302         * want to invent long names ourselves.
303         * Unicode 5 and later encode some of these scripts and give them long names.
304         * Whenever this happens, the long script names here need to be updated.
305         */
306        static const char* expectedLong[] = {
307            "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
308            "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
309            "Lepcha", "Lina", "Mandaic", "Maya", "Meroitic_Hieroglyphs", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
310            "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
311            "Zxxx", "Unknown",
312            "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
313            "Moon", "Meetei_Mayek",
314            /* new in ICU 4.0 */
315            "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
316            "Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
317            "Zmth", "Zsym",
318            /* new in ICU 4.4 */
319            "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
320            /* new in ICU 4.6 */
321            "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Meroitic_Cursive",
322            "Narb", "Nbat", "Palm", "Sind", "Wara",
323            /* new in ICU 4.8 */
324            "Afak", "Jurc", "Mroo", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
325            /* new in ICU 49 */
326            "Hluw", "Khoj", "Tirh",
327            /* new in ICU 52 */
328            "Aghb", "Mahj"
329        };
330        static const char* expectedShort[] = {
331            "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
332            "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
333            "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
334            "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
335            "Zxxx", "Zzzz",
336            "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
337            "Moon", "Mtei",
338            /* new in ICU 4.0 */
339            "Armi", "Avst", "Cakm", "Kore",
340            "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
341            "Zmth", "Zsym",
342            /* new in ICU 4.4 */
343            "Bamu", "Lisu", "Nkgb", "Sarb",
344            /* new in ICU 4.6 */
345            "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
346            "Narb", "Nbat", "Palm", "Sind", "Wara",
347            /* new in ICU 4.8 */
348            "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
349            /* new in ICU 49 */
350            "Hluw", "Khoj", "Tirh",
351            /* new in ICU 52 */
352            "Aghb", "Mahj"
353        };
354        int32_t j = 0;
355        if(LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
356            log_err("need to add new script codes in cucdapi.c!\n");
357            return;
358        }
359        for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
360            const char* name = uscript_getName((UScriptCode)i);
361            if(name==NULL || strcmp(name,expectedLong[j])!=0){
362                log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
363            }
364            name = uscript_getShortName((UScriptCode)i);
365            if(name==NULL || strcmp(name,expectedShort[j])!=0){
366                log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
367            }
368        }
369        for(i=0; i<LENGTHOF(expectedLong); i++){
370            UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
371            UErrorCode status = U_ZERO_ERROR;
372            int32_t len = 0;
373            len = uscript_getCode(expectedShort[i], fillIn, LENGTHOF(fillIn), &status);
374            if(U_FAILURE(status)){
375                log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
376            }
377            if(len>1){
378                log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
379            }
380            if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
381                log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
382            }
383        }
384    }
385
386    {
387        /* test characters which have Script_Extensions */
388        UErrorCode errorCode=U_ZERO_ERROR;
389        if(!(
390                USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
391                USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
392                USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
393            U_FAILURE(errorCode)
394        ) {
395            log_err("uscript_getScript(character with Script_Extensions) failed\n");
396        }
397    }
398}
399
400void TestHasScript() {
401    if(!(
402        !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
403        uscript_hasScript(0x063f, USCRIPT_ARABIC) &&  /* main Script value */
404        !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
405        !uscript_hasScript(0x063f, USCRIPT_THAANA))
406    ) {
407        log_err("uscript_hasScript(U+063F, ...) is wrong\n");
408    }
409    if(!(
410        !uscript_hasScript(0x0640, USCRIPT_COMMON) &&  /* main Script value */
411        uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
412        uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
413        !uscript_hasScript(0x0640, USCRIPT_THAANA))
414    ) {
415        log_err("uscript_hasScript(U+0640, ...) is wrong\n");
416    }
417    if(!(
418        !uscript_hasScript(0x0650, USCRIPT_INHERITED) &&  /* main Script value */
419        uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
420        uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
421        !uscript_hasScript(0x0650, USCRIPT_THAANA))
422    ) {
423        log_err("uscript_hasScript(U+0650, ...) is wrong\n");
424    }
425    if(!(
426        !uscript_hasScript(0x0660, USCRIPT_COMMON) &&  /* main Script value */
427        uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
428        !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
429        uscript_hasScript(0x0660, USCRIPT_THAANA))
430    ) {
431        log_err("uscript_hasScript(U+0660, ...) is wrong\n");
432    }
433    if(!(
434        !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
435        uscript_hasScript(0xfdf2, USCRIPT_ARABIC) &&  /* main Script value */
436        !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
437        uscript_hasScript(0xfdf2, USCRIPT_THAANA))
438    ) {
439        log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
440    }
441    if(uscript_hasScript(0x0640, 0xaffe)) {
442        /* An unguarded implementation might go into an infinite loop. */
443        log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
444    }
445}
446
447void TestGetScriptExtensions() {
448    UScriptCode scripts[20];
449    int32_t length;
450    UErrorCode errorCode;
451
452    /* errors and overflows */
453    errorCode=U_PARSE_ERROR;
454    length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
455    if(errorCode!=U_PARSE_ERROR) {
456        log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
457              u_errorName(errorCode));
458    }
459    errorCode=U_ZERO_ERROR;
460    length=uscript_getScriptExtensions(0x0640, NULL, LENGTHOF(scripts), &errorCode);
461    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
462        log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
463              u_errorName(errorCode));
464    }
465    errorCode=U_ZERO_ERROR;
466    length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
467    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
468        log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
469              u_errorName(errorCode));
470    }
471    errorCode=U_ZERO_ERROR;
472    length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
473    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
474        log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d != 3 - %s\n",
475              (int)length, u_errorName(errorCode));
476    }
477    errorCode=U_ZERO_ERROR;
478    length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
479    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
480        log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d != 3 - %s\n",
481              (int)length, u_errorName(errorCode));
482    }
483    /* U+063F has only a Script code, no Script_Extensions. */
484    errorCode=U_ZERO_ERROR;
485    length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
486    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
487        log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
488              (int)length, u_errorName(errorCode));
489    }
490
491    /* invalid code points */
492    errorCode=U_ZERO_ERROR;
493    length=uscript_getScriptExtensions(-1, scripts, LENGTHOF(scripts), &errorCode);
494    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
495        log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
496              (int)length, u_errorName(errorCode));
497    }
498    errorCode=U_ZERO_ERROR;
499    length=uscript_getScriptExtensions(0x110000, scripts, LENGTHOF(scripts), &errorCode);
500    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
501        log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
502              (int)length, u_errorName(errorCode));
503    }
504
505    /* normal usage */
506    errorCode=U_ZERO_ERROR;
507    length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
508    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
509        log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
510              (int)length, u_errorName(errorCode));
511    }
512    errorCode=U_ZERO_ERROR;
513    length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
514    if(U_FAILURE(errorCode) || length!=3 ||
515       scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_SYRIAC || scripts[2]!=USCRIPT_MANDAIC
516    ) {
517        log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
518              (int)length, u_errorName(errorCode));
519    }
520    errorCode=U_ZERO_ERROR;
521    length=uscript_getScriptExtensions(0xfdf2, scripts, LENGTHOF(scripts), &errorCode);
522    if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
523        log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
524              (int)length, u_errorName(errorCode));
525    }
526    errorCode=U_ZERO_ERROR;
527    length=uscript_getScriptExtensions(0xff65, scripts, LENGTHOF(scripts), &errorCode);
528    if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
529        log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
530              (int)length, u_errorName(errorCode));
531    }
532}
533
534void TestScriptMetadataAPI() {
535    /* API & code coverage. More testing in intltest/ucdtest.cpp. */
536    UErrorCode errorCode=U_ZERO_ERROR;
537    UChar sample[8];
538
539    if(uscript_getSampleString(USCRIPT_LATIN, sample, LENGTHOF(sample), &errorCode)!=1 ||
540            U_FAILURE(errorCode) ||
541            uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
542            sample[1]!=0) {
543        log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
544    }
545    sample[0]=0xfffe;
546    if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
547            errorCode!=U_BUFFER_OVERFLOW_ERROR ||
548            sample[0]!=0xfffe) {
549        log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
550    }
551    errorCode=U_ZERO_ERROR;
552    if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, LENGTHOF(sample), &errorCode)!=0 ||
553            U_FAILURE(errorCode) ||
554            sample[0]!=0) {
555        log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
556    }
557    sample[0]=0xfffe;
558    if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
559            errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
560            sample[0]!=0xfffe) {
561        log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
562    }
563
564    if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
565            uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
566            uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
567            uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
568            uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
569            uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
570            uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
571        log_err("uscript_getUsage() failed\n");
572    }
573
574    if(uscript_isRightToLeft(USCRIPT_LATIN) ||
575            uscript_isRightToLeft(USCRIPT_CIRTH) ||
576            !uscript_isRightToLeft(USCRIPT_ARABIC) ||
577            !uscript_isRightToLeft(USCRIPT_HEBREW)) {
578        log_err("uscript_isRightToLeft() failed\n");
579    }
580
581    if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
582            uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
583            !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
584            !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
585        log_err("uscript_breaksBetweenLetters() failed\n");
586    }
587
588    if(uscript_isCased(USCRIPT_CIRTH) ||
589            uscript_isCased(USCRIPT_HAN) ||
590            !uscript_isCased(USCRIPT_LATIN) ||
591            !uscript_isCased(USCRIPT_GREEK)) {
592        log_err("uscript_isCased() failed\n");
593    }
594}
595
596void TestBinaryValues() {
597    /*
598     * Unicode 5.1 explicitly defines binary property value aliases.
599     * Verify that they are all recognized.
600     */
601    static const char *const falseValues[]={ "N", "No", "F", "False" };
602    static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
603    int32_t i;
604    for(i=0; i<LENGTHOF(falseValues); ++i) {
605        if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
606            log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
607        }
608    }
609    for(i=0; i<LENGTHOF(trueValues); ++i) {
610        if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
611            log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
612        }
613    }
614}
615