1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CUCDTST.C
9*
10* Modification History:
11*        Name                     Description
12*     Madhu Katragadda            Ported for C API, added tests for string functions
13********************************************************************************
14*/
15
16#include <string.h>
17#include <math.h>
18#include <stdlib.h>
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/putil.h"
23#include "unicode/ustring.h"
24#include "unicode/uloc.h"
25#include "unicode/unorm2.h"
26
27#include "cintltst.h"
28#include "putilimp.h"
29#include "uparse.h"
30#include "ucase.h"
31#include "ubidi_props.h"
32#include "uprops.h"
33#include "uset_imp.h"
34#include "usc_impl.h"
35#include "udatamem.h" /* for testing ucase_openBinary() */
36#include "cucdapi.h"
37#include "cmemory.h"
38
39/* prototypes --------------------------------------------------------------- */
40
41static void TestUpperLower(void);
42static void TestLetterNumber(void);
43static void TestMisc(void);
44static void TestPOSIX(void);
45static void TestControlPrint(void);
46static void TestIdentifier(void);
47static void TestUnicodeData(void);
48static void TestCodeUnit(void);
49static void TestCodePoint(void);
50static void TestCharLength(void);
51static void TestCharNames(void);
52static void TestUCharFromNameUnderflow(void);
53static void TestMirroring(void);
54static void TestUScriptRunAPI(void);
55static void TestAdditionalProperties(void);
56static void TestNumericProperties(void);
57static void TestPropertyNames(void);
58static void TestPropertyValues(void);
59static void TestConsistency(void);
60static void TestUCase(void);
61static void TestUBiDiProps(void);
62static void TestCaseFolding(void);
63
64/* internal methods used */
65static int32_t MakeProp(char* str);
66static int32_t MakeDir(char* str);
67
68/* helpers ------------------------------------------------------------------ */
69
70static void
71parseUCDFile(const char *filename,
72             char *fields[][2], int32_t fieldCount,
73             UParseLineFn *lineFn, void *context,
74             UErrorCode *pErrorCode) {
75    char path[256];
76    char backupPath[256];
77
78    if(U_FAILURE(*pErrorCode)) {
79        return;
80    }
81
82    /* Look inside ICU_DATA first */
83    strcpy(path, u_getDataDirectory());
84    strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
85    strcat(path, filename);
86
87    /* As a fallback, try to guess where the source data was located
88     *    at the time ICU was built, and look there.
89     */
90    strcpy(backupPath, ctest_dataSrcDir());
91    strcat(backupPath, U_FILE_SEP_STRING);
92    strcat(backupPath, "unidata" U_FILE_SEP_STRING);
93    strcat(backupPath, filename);
94
95    u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
96    if(*pErrorCode==U_FILE_ACCESS_ERROR) {
97        *pErrorCode=U_ZERO_ERROR;
98        u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
99    }
100    if(U_FAILURE(*pErrorCode)) {
101        log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
102    }
103}
104
105/* test data ---------------------------------------------------------------- */
106
107static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
108static const int32_t tagValues[] =
109    {
110    /* Mn */ U_NON_SPACING_MARK,
111    /* Mc */ U_COMBINING_SPACING_MARK,
112    /* Me */ U_ENCLOSING_MARK,
113    /* Nd */ U_DECIMAL_DIGIT_NUMBER,
114    /* Nl */ U_LETTER_NUMBER,
115    /* No */ U_OTHER_NUMBER,
116    /* Zs */ U_SPACE_SEPARATOR,
117    /* Zl */ U_LINE_SEPARATOR,
118    /* Zp */ U_PARAGRAPH_SEPARATOR,
119    /* Cc */ U_CONTROL_CHAR,
120    /* Cf */ U_FORMAT_CHAR,
121    /* Cs */ U_SURROGATE,
122    /* Co */ U_PRIVATE_USE_CHAR,
123    /* Cn */ U_UNASSIGNED,
124    /* Lu */ U_UPPERCASE_LETTER,
125    /* Ll */ U_LOWERCASE_LETTER,
126    /* Lt */ U_TITLECASE_LETTER,
127    /* Lm */ U_MODIFIER_LETTER,
128    /* Lo */ U_OTHER_LETTER,
129    /* Pc */ U_CONNECTOR_PUNCTUATION,
130    /* Pd */ U_DASH_PUNCTUATION,
131    /* Ps */ U_START_PUNCTUATION,
132    /* Pe */ U_END_PUNCTUATION,
133    /* Po */ U_OTHER_PUNCTUATION,
134    /* Sm */ U_MATH_SYMBOL,
135    /* Sc */ U_CURRENCY_SYMBOL,
136    /* Sk */ U_MODIFIER_SYMBOL,
137    /* So */ U_OTHER_SYMBOL,
138    /* Pi */ U_INITIAL_PUNCTUATION,
139    /* Pf */ U_FINAL_PUNCTUATION
140    };
141
142static const char dirStrings[][5] = {
143    "L",
144    "R",
145    "EN",
146    "ES",
147    "ET",
148    "AN",
149    "CS",
150    "B",
151    "S",
152    "WS",
153    "ON",
154    "LRE",
155    "LRO",
156    "AL",
157    "RLE",
158    "RLO",
159    "PDF",
160    "NSM",
161    "BN",
162    /* new in Unicode 6.3/ICU 52 */
163    "FSI",
164    "LRI",
165    "RLI",
166    "PDI"
167};
168
169void addUnicodeTest(TestNode** root);
170
171void addUnicodeTest(TestNode** root)
172{
173    addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
174    addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
175    addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
176    addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
177    addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
178    addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
179    addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
180    addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
181    addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
182    addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
183    addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
184    addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
185    addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
186    addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
187    addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow");
188    addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
189    addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
190    addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
191    addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
192    addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
193    addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
194    addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
195    addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
196    addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
197    addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
198    addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
199    addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
200}
201
202/*==================================================== */
203/* test u_toupper() and u_tolower()                    */
204/*==================================================== */
205static void TestUpperLower()
206{
207    const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
208    const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
209    U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
210    U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
211    int32_t i;
212
213    U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
214    U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
215
216/*
217Checks LetterLike Symbols which were previously a source of confusion
218[Bertrand A. D. 02/04/98]
219*/
220    for (i=0x2100;i<0x2138;i++)
221    {
222        /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
223        if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
224        {
225            if (i != (int)u_tolower(i)) /* itself */
226                log_err("Failed case conversion with itself: U+%04x\n", i);
227            if (i != (int)u_toupper(i))
228                log_err("Failed case conversion with itself: U+%04x\n", i);
229        }
230    }
231
232    for(i=0; i < u_strlen(upper); i++){
233        if(u_tolower(upper[i]) != lower[i]){
234            log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
235        }
236    }
237
238    log_verbose("testing upper lower\n");
239    for (i = 0; i < 21; i++) {
240
241        if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
242        {
243            log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
244        }
245        else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
246         {
247            log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
248        }
249        else if (upperTest[i] != u_tolower(lowerTest[i]))
250        {
251            log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
252        }
253        else if (lowerTest[i] != u_toupper(upperTest[i]))
254         {
255            log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
256        }
257        else if (upperTest[i] != u_tolower(upperTest[i]))
258        {
259            log_err("Failed case conversion with itself: %c\n", upperTest[i]);
260        }
261        else if (lowerTest[i] != u_toupper(lowerTest[i]))
262        {
263            log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
264        }
265    }
266    log_verbose("done testing upper lower\n");
267
268    log_verbose("testing u_istitle\n");
269    {
270        static const UChar expected[] = {
271            0x1F88,
272            0x1F89,
273            0x1F8A,
274            0x1F8B,
275            0x1F8C,
276            0x1F8D,
277            0x1F8E,
278            0x1F8F,
279            0x1F88,
280            0x1F89,
281            0x1F8A,
282            0x1F8B,
283            0x1F8C,
284            0x1F8D,
285            0x1F8E,
286            0x1F8F,
287            0x1F98,
288            0x1F99,
289            0x1F9A,
290            0x1F9B,
291            0x1F9C,
292            0x1F9D,
293            0x1F9E,
294            0x1F9F,
295            0x1F98,
296            0x1F99,
297            0x1F9A,
298            0x1F9B,
299            0x1F9C,
300            0x1F9D,
301            0x1F9E,
302            0x1F9F,
303            0x1FA8,
304            0x1FA9,
305            0x1FAA,
306            0x1FAB,
307            0x1FAC,
308            0x1FAD,
309            0x1FAE,
310            0x1FAF,
311            0x1FA8,
312            0x1FA9,
313            0x1FAA,
314            0x1FAB,
315            0x1FAC,
316            0x1FAD,
317            0x1FAE,
318            0x1FAF,
319            0x1FBC,
320            0x1FBC,
321            0x1FCC,
322            0x1FCC,
323            0x1FFC,
324            0x1FFC,
325        };
326        int32_t num = sizeof(expected)/sizeof(expected[0]);
327        for(i=0; i<num; i++){
328            if(!u_istitle(expected[i])){
329                log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
330            }
331        }
332
333    }
334}
335
336/* compare two sets and verify that their difference or intersection is empty */
337static UBool
338showADiffB(const USet *a, const USet *b,
339           const char *a_name, const char *b_name,
340           UBool expect, UBool diffIsError) {
341    USet *aa;
342    int32_t i, start, end, length;
343    UErrorCode errorCode;
344
345    /*
346     * expect:
347     * TRUE  -> a-b should be empty, that is, b should contain all of a
348     * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
349     */
350    if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
351        return TRUE;
352    }
353
354    /* clone a to aa because a is const */
355    aa=uset_open(1, 0);
356    if(aa==NULL) {
357        /* unusual problem - out of memory? */
358        return FALSE;
359    }
360    uset_addAll(aa, a);
361
362    /* compute the set in question */
363    if(expect) {
364        /* a-b */
365        uset_removeAll(aa, b);
366    } else {
367        /* a&b */
368        uset_retainAll(aa, b);
369    }
370
371    /* aa is not empty because of the initial tests above; show its contents */
372    errorCode=U_ZERO_ERROR;
373    i=0;
374    for(;;) {
375        length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
376        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
377            break; /* done */
378        }
379        if(U_FAILURE(errorCode)) {
380            log_err("error comparing %s with %s at difference item %d: %s\n",
381                a_name, b_name, i, u_errorName(errorCode));
382            break;
383        }
384        if(length!=0) {
385            break; /* done with code points, got a string or -1 */
386        }
387
388        if(diffIsError) {
389            if(expect) {
390                log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
391            } else {
392                log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
393            }
394        } else {
395            if(expect) {
396                log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
397            } else {
398                log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
399            }
400        }
401
402        ++i;
403    }
404
405    uset_close(aa);
406    return FALSE;
407}
408
409static UBool
410showAMinusB(const USet *a, const USet *b,
411            const char *a_name, const char *b_name,
412            UBool diffIsError) {
413    return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
414}
415
416static UBool
417showAIntersectB(const USet *a, const USet *b,
418                const char *a_name, const char *b_name,
419                UBool diffIsError) {
420    return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
421}
422
423static UBool
424compareUSets(const USet *a, const USet *b,
425             const char *a_name, const char *b_name,
426             UBool diffIsError) {
427    /*
428     * Use an arithmetic & not a logical && so that both branches
429     * are always taken and all differences are shown.
430     */
431    return
432        showAMinusB(a, b, a_name, b_name, diffIsError) &
433        showAMinusB(b, a, b_name, a_name, diffIsError);
434}
435
436/* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
437static void TestLetterNumber()
438{
439    UChar i = 0x0000;
440
441    log_verbose("Testing for isalpha\n");
442    for (i = 0x0041; i < 0x005B; i++) {
443        if (!u_isalpha(i))
444        {
445            log_err("Failed isLetter test at  %.4X\n", i);
446        }
447    }
448    for (i = 0x0660; i < 0x066A; i++) {
449        if (u_isalpha(i))
450        {
451            log_err("Failed isLetter test with numbers at %.4X\n", i);
452        }
453    }
454
455    log_verbose("Testing for isdigit\n");
456    for (i = 0x0660; i < 0x066A; i++) {
457        if (!u_isdigit(i))
458        {
459            log_verbose("Failed isNumber test at %.4X\n", i);
460        }
461    }
462
463    log_verbose("Testing for isalnum\n");
464    for (i = 0x0041; i < 0x005B; i++) {
465        if (!u_isalnum(i))
466        {
467            log_err("Failed isAlNum test at  %.4X\n", i);
468        }
469    }
470    for (i = 0x0660; i < 0x066A; i++) {
471        if (!u_isalnum(i))
472        {
473            log_err("Failed isAlNum test at  %.4X\n", i);
474        }
475    }
476
477    {
478        /*
479         * The following checks work only starting from Unicode 4.0.
480         * Check the version number here.
481         */
482        static UVersionInfo u401={ 4, 0, 1, 0 };
483        UVersionInfo version;
484        u_getUnicodeVersion(version);
485        if(version[0]<4 || 0==memcmp(version, u401, 4)) {
486            return;
487        }
488    }
489
490    {
491        /*
492         * Sanity check:
493         * Verify that exactly the digit characters have decimal digit values.
494         * This assumption is used in the implementation of u_digit()
495         * (which checks nt=de)
496         * compared with the parallel java.lang.Character.digit()
497         * (which checks Nd).
498         *
499         * This was not true in Unicode 3.2 and earlier.
500         * Unicode 4.0 fixed discrepancies.
501         * Unicode 4.0.1 re-introduced problems in this area due to an
502         * unintentionally incomplete last-minute change.
503         */
504        U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
505        U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
506
507        USet *digits, *decimalValues;
508        UErrorCode errorCode;
509
510        U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
511        U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
512        errorCode=U_ZERO_ERROR;
513        digits=uset_openPattern(digitsPattern, 6, &errorCode);
514        decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
515
516        if(U_SUCCESS(errorCode)) {
517            compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
518        }
519
520        uset_close(digits);
521        uset_close(decimalValues);
522    }
523}
524
525static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
526                                const UChar32 *sampleChars, int32_t sampleCharsLength,
527                                UBool expected) {
528    int32_t i;
529    for (i = 0; i < sampleCharsLength; ++i) {
530        UBool result = propFn(sampleChars[i]);
531        if (result != expected) {
532            log_err("error: character property function %s(U+%04x)=%d is wrong\n",
533                    propName, sampleChars[i], result);
534        }
535    }
536}
537
538/* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
539static void TestMisc()
540{
541    static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
542    static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
543    static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
544    static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
545    static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
546    static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
547/*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
548    static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
549    static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
550    static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
551    static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
552
553    static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
554
555    uint32_t mask;
556
557    int32_t i;
558    char icuVersion[U_MAX_VERSION_STRING_LENGTH];
559    UVersionInfo realVersion;
560
561    memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
562
563    testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
564    testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
565
566    testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
567                        sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE);
568    testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
569                        sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE);
570
571    testSampleCharProps(u_isWhitespace, "u_isWhitespace",
572                        sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRUE);
573    testSampleCharProps(u_isWhitespace, "u_isWhitespace",
574                        sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), FALSE);
575
576    testSampleCharProps(u_isdefined, "u_isdefined",
577                        sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE);
578    testSampleCharProps(u_isdefined, "u_isdefined",
579                        sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE);
580
581    testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), TRUE);
582    testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), FALSE);
583
584    testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), TRUE);
585    testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), FALSE);
586
587    for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {
588        if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
589            log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
590                    sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
591        }
592    }
593
594    /* Tests the ICU version #*/
595    u_getVersion(realVersion);
596    u_versionToString(realVersion, icuVersion);
597    if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
598    {
599        log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
600    }
601#if defined(ICU_VERSION)
602    /* test only happens where we have configure.in with VERSION - sanity check. */
603    if(strcmp(U_ICU_VERSION, ICU_VERSION))
604    {
605        log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
606    }
607#endif
608
609    /* test U_GC_... */
610    if(
611        U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
612        U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
613        U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
614        U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
615        U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
616        U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
617    ) {
618        log_err("error: U_GET_GC_MASK does not work properly\n");
619    }
620
621    mask=0;
622    mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
623
624    mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
625    mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
626    mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
627    mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
628    mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
629
630    mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
631    mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
632    mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
633
634    mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
635    mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
636    mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
637
638    mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
639    mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
640    mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
641
642    mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
643    mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
644    mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
645    mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
646
647    mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
648    mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
649    mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
650    mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
651    mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
652
653    mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
654    mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
655    mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
656    mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
657
658    mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
659    mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
660
661    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
662        log_err("error: problems with U_GC_XX_MASK constants\n");
663    }
664
665    mask=0;
666    mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
667    mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
668    mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
669    mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
670    mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
671    mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
672    mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
673
674    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
675        log_err("error: problems with U_GC_Y_MASK constants\n");
676    }
677    {
678        static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
679        for(i=0; i<10; i++){
680            if(digit[i]!=u_forDigit(i,10)){
681                log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
682            }
683        }
684    }
685
686    /* test u_digit() */
687    {
688        static const struct {
689            UChar32 c;
690            int8_t radix, value;
691        } data[]={
692            /* base 16 */
693            { 0x0031, 16, 1 },
694            { 0x0038, 16, 8 },
695            { 0x0043, 16, 12 },
696            { 0x0066, 16, 15 },
697            { 0x00e4, 16, -1 },
698            { 0x0662, 16, 2 },
699            { 0x06f5, 16, 5 },
700            { 0xff13, 16, 3 },
701            { 0xff41, 16, 10 },
702
703            /* base 8 */
704            { 0x0031, 8, 1 },
705            { 0x0038, 8, -1 },
706            { 0x0043, 8, -1 },
707            { 0x0066, 8, -1 },
708            { 0x00e4, 8, -1 },
709            { 0x0662, 8, 2 },
710            { 0x06f5, 8, 5 },
711            { 0xff13, 8, 3 },
712            { 0xff41, 8, -1 },
713
714            /* base 36 */
715            { 0x5a, 36, 35 },
716            { 0x7a, 36, 35 },
717            { 0xff3a, 36, 35 },
718            { 0xff5a, 36, 35 },
719
720            /* wrong radix values */
721            { 0x0031, 1, -1 },
722            { 0xff3a, 37, -1 }
723        };
724
725        for(i=0; i<UPRV_LENGTHOF(data); ++i) {
726            if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
727                log_err("u_digit(U+%04x, %d)=%d expected %d\n",
728                        data[i].c,
729                        data[i].radix,
730                        u_digit(data[i].c, data[i].radix),
731                        data[i].value);
732            }
733        }
734    }
735}
736
737/* test C/POSIX-style functions --------------------------------------------- */
738
739/* bit flags */
740#define ISAL     1
741#define ISLO     2
742#define ISUP     4
743
744#define ISDI     8
745#define ISXD  0x10
746
747#define ISAN  0x20
748
749#define ISPU  0x40
750#define ISGR  0x80
751#define ISPR 0x100
752
753#define ISSP 0x200
754#define ISBL 0x400
755#define ISCN 0x800
756
757/* C/POSIX-style functions, in the same order as the bit flags */
758typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
759
760static const struct {
761    IsPOSIXClass *fn;
762    const char *name;
763} posixClasses[]={
764    { u_isalpha, "isalpha" },
765    { u_islower, "islower" },
766    { u_isupper, "isupper" },
767    { u_isdigit, "isdigit" },
768    { u_isxdigit, "isxdigit" },
769    { u_isalnum, "isalnum" },
770    { u_ispunct, "ispunct" },
771    { u_isgraph, "isgraph" },
772    { u_isprint, "isprint" },
773    { u_isspace, "isspace" },
774    { u_isblank, "isblank" },
775    { u_iscntrl, "iscntrl" }
776};
777
778static const struct {
779    UChar32 c;
780    uint32_t posixResults;
781} posixData[]={
782    { 0x0008,                                                        ISCN },    /* backspace */
783    { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
784    { 0x000a,                                              ISSP|     ISCN },    /* LF */
785    { 0x000c,                                              ISSP|     ISCN },    /* FF */
786    { 0x000d,                                              ISSP|     ISCN },    /* CR */
787    { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
788    { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
789    { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
790    { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
791    { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
792    { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
793    { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
794    { 0x0085,                                              ISSP|     ISCN },    /* NEL */
795    { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
796    { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
797    { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
798    { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
799    { 0x0600,                                                        ISCN },    /* arabic number sign */
800    { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
801    { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
802    { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
803    { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
804    { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
805    { 0x200b,                                                        ISCN },    /* ZWSP */
806  /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
807    { 0x200e,                                                        ISCN },    /* LRM */
808    { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
809    { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
810    { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
811    { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
812    { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
813    { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
814    { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
815    { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
816};
817
818static void
819TestPOSIX() {
820    uint32_t mask;
821    int32_t cl, i;
822    UBool expect;
823
824    mask=1;
825    for(cl=0; cl<12; ++cl) {
826        for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {
827            expect=(UBool)((posixData[i].posixResults&mask)!=0);
828            if(posixClasses[cl].fn(posixData[i].c)!=expect) {
829                log_err("u_%s(U+%04x)=%s is wrong\n",
830                    posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
831            }
832        }
833        mask<<=1;
834    }
835}
836
837/* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
838static void TestControlPrint()
839{
840    const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
841    const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
842    const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
843    const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
844    UChar32 c;
845
846    testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), TRUE);
847    testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), FALSE);
848
849    testSampleCharProps(u_isprint, "u_isprint",
850                        samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE);
851    testSampleCharProps(u_isprint, "u_isprint",
852                        sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), FALSE);
853
854    /* test all ISO 8 controls */
855    for(c=0; c<=0x9f; ++c) {
856        if(c==0x20) {
857            /* skip ASCII graphic characters and continue with DEL */
858            c=0x7f;
859        }
860        if(!u_iscntrl(c)) {
861            log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
862        }
863        if(!u_isISOControl(c)) {
864            log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
865        }
866        if(u_isprint(c)) {
867            log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
868        }
869    }
870
871    /* test all Latin-1 graphic characters */
872    for(c=0x20; c<=0xff; ++c) {
873        if(c==0x7f) {
874            c=0xa0;
875        } else if(c==0xad) {
876            /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
877            ++c;
878        }
879        if(!u_isprint(c)) {
880            log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
881        }
882    }
883}
884
885/* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
886static void TestIdentifier()
887{
888    const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
889    const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
890    const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
891    const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
892    const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
893    const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
894    const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
895    const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
896    const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
897    const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
898
899    testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
900                        sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
901    testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
902                        sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), FALSE);
903
904    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
905                        sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE);
906    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
907                        sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE);
908
909    /* IDPart should imply IDStart */
910    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
911                        sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE);
912
913    testSampleCharProps(u_isIDStart, "u_isIDStart",
914                        sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
915    testSampleCharProps(u_isIDStart, "u_isIDStart",
916                        sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), FALSE);
917
918    testSampleCharProps(u_isIDPart, "u_isIDPart",
919                        sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE);
920    testSampleCharProps(u_isIDPart, "u_isIDPart",
921                        sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), FALSE);
922
923    /* IDPart should imply IDStart */
924    testSampleCharProps(u_isIDPart, "u_isIDPart",
925                        sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE);
926
927    testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
928                        sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE);
929    testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
930                        sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FALSE);
931}
932
933/* for each line of UnicodeData.txt, check some of the properties */
934typedef struct UnicodeDataContext {
935#if UCONFIG_NO_NORMALIZATION
936    const void *dummy;
937#else
938    const UNormalizer2 *nfc;
939    const UNormalizer2 *nfkc;
940#endif
941} UnicodeDataContext;
942
943/*
944 * ### TODO
945 * This test fails incorrectly if the First or Last code point of a repetitive area
946 * is overridden, which is allowed and is encouraged for the PUAs.
947 * Currently, this means that both area First/Last and override lines are
948 * tested against the properties from the API,
949 * and the area boundary will not match and cause an error.
950 *
951 * This function should detect area boundaries and skip them for the test of individual
952 * code points' properties.
953 * Then it should check that the areas contain all the same properties except where overridden.
954 * For this, it would have had to set a flag for which code points were listed explicitly.
955 */
956static void U_CALLCONV
957unicodeDataLineFn(void *context,
958                  char *fields[][2], int32_t fieldCount,
959                  UErrorCode *pErrorCode)
960{
961    char buffer[100];
962    const char *d;
963    char *end;
964    uint32_t value;
965    UChar32 c;
966    int32_t i;
967    int8_t type;
968    int32_t dt;
969    UChar dm[32], s[32];
970    int32_t dmLength, length;
971
972#if !UCONFIG_NO_NORMALIZATION
973    const UNormalizer2 *nfc, *nfkc;
974#endif
975
976    /* get the character code, field 0 */
977    c=strtoul(fields[0][0], &end, 16);
978    if(end<=fields[0][0] || end!=fields[0][1]) {
979        log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
980        return;
981    }
982    if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
983        log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
984        return;
985    }
986
987    /* get general category, field 2 */
988    *fields[2][1]=0;
989    type = (int8_t)tagValues[MakeProp(fields[2][0])];
990    if(u_charType(c)!=type) {
991        log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
992    }
993    if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
994        log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
995    }
996
997    /* get canonical combining class, field 3 */
998    value=strtoul(fields[3][0], &end, 10);
999    if(end<=fields[3][0] || end!=fields[3][1]) {
1000        log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1001        return;
1002    }
1003    if(value>255) {
1004        log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1005        return;
1006    }
1007#if !UCONFIG_NO_NORMALIZATION
1008    if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1009        log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1010    }
1011    nfkc=((UnicodeDataContext *)context)->nfkc;
1012    if(value!=unorm2_getCombiningClass(nfkc, c)) {
1013        log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1014    }
1015#endif
1016
1017    /* get BiDi category, field 4 */
1018    *fields[4][1]=0;
1019    i=MakeDir(fields[4][0]);
1020    if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1021        log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1022    }
1023
1024    /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1025    d=NULL;
1026    if(fields[5][0]==fields[5][1]) {
1027        /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1028        if(c==0xac00 || c==0xd7a3) {
1029            dt=U_DT_CANONICAL;
1030        } else {
1031            dt=U_DT_NONE;
1032        }
1033    } else {
1034        d=fields[5][0];
1035        *fields[5][1]=0;
1036        dt=UCHAR_INVALID_CODE;
1037        if(*d=='<') {
1038            end=strchr(++d, '>');
1039            if(end!=NULL) {
1040                *end=0;
1041                dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1042                d=u_skipWhitespace(end+1);
1043            }
1044        } else {
1045            dt=U_DT_CANONICAL;
1046        }
1047    }
1048    if(dt>U_DT_NONE) {
1049        if(c==0xac00) {
1050            dm[0]=0x1100;
1051            dm[1]=0x1161;
1052            dm[2]=0;
1053            dmLength=2;
1054        } else if(c==0xd7a3) {
1055            dm[0]=0xd788;
1056            dm[1]=0x11c2;
1057            dm[2]=0;
1058            dmLength=2;
1059        } else {
1060            dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1061        }
1062    } else {
1063        dmLength=-1;
1064    }
1065    if(dt<0 || U_FAILURE(*pErrorCode)) {
1066        log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1067        return;
1068    }
1069#if !UCONFIG_NO_NORMALIZATION
1070    i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1071    if(i!=dt) {
1072        log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1073    }
1074    /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1075    length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1076    if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1077        log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1078                "or the Decomposition_Mapping is different (%s)\n",
1079                c, length, dmLength, u_errorName(*pErrorCode));
1080        return;
1081    }
1082    /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1083    if(dt!=U_DT_CANONICAL) {
1084        dmLength=-1;
1085    }
1086    nfc=((UnicodeDataContext *)context)->nfc;
1087    length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1088    if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1089        log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1090                "or the Decomposition_Mapping is different (%s)\n",
1091                c, length, dmLength, u_errorName(*pErrorCode));
1092        return;
1093    }
1094    /* recompose */
1095    if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1096        UChar32 a, b, composite;
1097        i=0;
1098        U16_NEXT(dm, i, dmLength, a);
1099        U16_NEXT(dm, i, dmLength, b);
1100        /* i==dmLength */
1101        composite=unorm2_composePair(nfc, a, b);
1102        if(composite!=c) {
1103            log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1104                    (long)c, (long)a, (long)b, (long)composite);
1105        }
1106        /*
1107         * Note: NFKC has fewer round-trip mappings than NFC,
1108         * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1109         */
1110    }
1111#endif
1112
1113    /* get ISO Comment, field 11 */
1114    *fields[11][1]=0;
1115    i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1116    if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1117        log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1118            c, u_errorName(*pErrorCode),
1119            U_FAILURE(*pErrorCode) ? buffer : "[error]",
1120            fields[11][0]);
1121    }
1122
1123    /* get uppercase mapping, field 12 */
1124    if(fields[12][0]!=fields[12][1]) {
1125        value=strtoul(fields[12][0], &end, 16);
1126        if(end!=fields[12][1]) {
1127            log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1128            return;
1129        }
1130        if((UChar32)value!=u_toupper(c)) {
1131            log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1132        }
1133    } else {
1134        /* no case mapping: the API must map the code point to itself */
1135        if(c!=u_toupper(c)) {
1136            log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1137        }
1138    }
1139
1140    /* get lowercase mapping, field 13 */
1141    if(fields[13][0]!=fields[13][1]) {
1142        value=strtoul(fields[13][0], &end, 16);
1143        if(end!=fields[13][1]) {
1144            log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1145            return;
1146        }
1147        if((UChar32)value!=u_tolower(c)) {
1148            log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1149        }
1150    } else {
1151        /* no case mapping: the API must map the code point to itself */
1152        if(c!=u_tolower(c)) {
1153            log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1154        }
1155    }
1156
1157    /* get titlecase mapping, field 14 */
1158    if(fields[14][0]!=fields[14][1]) {
1159        value=strtoul(fields[14][0], &end, 16);
1160        if(end!=fields[14][1]) {
1161            log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1162            return;
1163        }
1164        if((UChar32)value!=u_totitle(c)) {
1165            log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1166        }
1167    } else {
1168        /* no case mapping: the API must map the code point to itself */
1169        if(c!=u_totitle(c)) {
1170            log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1171        }
1172    }
1173}
1174
1175static UBool U_CALLCONV
1176enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1177    static const UChar32 test[][2]={
1178        {0x41, U_UPPERCASE_LETTER},
1179        {0x308, U_NON_SPACING_MARK},
1180        {0xfffe, U_GENERAL_OTHER_TYPES},
1181        {0xe0041, U_FORMAT_CHAR},
1182        {0xeffff, U_UNASSIGNED}
1183    };
1184
1185    int32_t i, count;
1186
1187    if(0!=strcmp((const char *)context, "a1")) {
1188        log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1189        return FALSE;
1190    }
1191
1192    count=UPRV_LENGTHOF(test);
1193    for(i=0; i<count; ++i) {
1194        if(start<=test[i][0] && test[i][0]<limit) {
1195            if(type!=(UCharCategory)test[i][1]) {
1196                log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1197                        start, limit, (long)type, test[i][0], test[i][1]);
1198            }
1199            /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1200            return i==(count-1) ? FALSE : TRUE;
1201        }
1202    }
1203
1204    if(start>test[count-1][0]) {
1205        log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1206                start, limit, (long)type);
1207        return FALSE;
1208    }
1209
1210    return TRUE;
1211}
1212
1213static UBool U_CALLCONV
1214enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1215    /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1216    static const int32_t defaultBidi[][2]={ /* { limit, class } */
1217        { 0x0590, U_LEFT_TO_RIGHT },
1218        { 0x0600, U_RIGHT_TO_LEFT },
1219        { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1220        { 0x08A0, U_RIGHT_TO_LEFT },
1221        { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1222        { 0x20A0, U_LEFT_TO_RIGHT },
1223        { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1224        { 0xFB1D, U_LEFT_TO_RIGHT },
1225        { 0xFB50, U_RIGHT_TO_LEFT },
1226        { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1227        { 0xFE70, U_LEFT_TO_RIGHT },
1228        { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1229        { 0x10800, U_LEFT_TO_RIGHT },
1230        { 0x11000, U_RIGHT_TO_LEFT },
1231        { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1232        { 0x1EE00, U_RIGHT_TO_LEFT },
1233        { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1234        { 0x1F000, U_RIGHT_TO_LEFT },
1235        { 0x110000, U_LEFT_TO_RIGHT }
1236    };
1237
1238    UChar32 c;
1239    int32_t i;
1240    UCharDirection shouldBeDir;
1241
1242    /*
1243     * LineBreak.txt specifies:
1244     *   #  - Assigned characters that are not listed explicitly are given the value
1245     *   #    "AL".
1246     *   #  - Unassigned characters are given the value "XX".
1247     *
1248     * PUA characters are listed explicitly with "XX".
1249     * Verify that no assigned character has "XX".
1250     */
1251    if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1252        c=start;
1253        while(c<limit) {
1254            if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1255                log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1256            }
1257            ++c;
1258        }
1259    }
1260
1261    /*
1262     * Verify default Bidi classes.
1263     * For recent Unicode versions, see UCD.html.
1264     *
1265     * For older Unicode versions:
1266     * See table 3-7 "Bidirectional Character Types" in UAX #9.
1267     * http://www.unicode.org/reports/tr9/
1268     *
1269     * See also DerivedBidiClass.txt for Cn code points!
1270     *
1271     * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1272     * changed some default values.
1273     * In particular, non-characters and unassigned Default Ignorable Code Points
1274     * change from L to BN.
1275     *
1276     * UCD.html version 4.0.1 does not yet reflect these changes.
1277     */
1278    if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1279        /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1280        c=start;
1281        for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {
1282            if((int32_t)c<defaultBidi[i][0]) {
1283                while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1284                    if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1285                        shouldBeDir=U_BOUNDARY_NEUTRAL;
1286                    } else {
1287                        shouldBeDir=(UCharDirection)defaultBidi[i][1];
1288                    }
1289
1290                    if( u_charDirection(c)!=shouldBeDir ||
1291                        u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1292                    ) {
1293                        log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1294                            c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1295                    }
1296                    ++c;
1297                }
1298            }
1299        }
1300    }
1301
1302    return TRUE;
1303}
1304
1305/* tests for several properties */
1306static void TestUnicodeData()
1307{
1308    UVersionInfo expectVersionArray;
1309    UVersionInfo versionArray;
1310    char *fields[15][2];
1311    UErrorCode errorCode;
1312    UChar32 c;
1313    int8_t type;
1314
1315    UnicodeDataContext context;
1316
1317    u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1318    u_getUnicodeVersion(versionArray);
1319    if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1320    {
1321        log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1322        versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1323    }
1324
1325#if defined(ICU_UNICODE_VERSION)
1326    /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1327    if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1328    {
1329         log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1330    }
1331#endif
1332
1333    if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1334        log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1335    }
1336
1337    errorCode=U_ZERO_ERROR;
1338#if !UCONFIG_NO_NORMALIZATION
1339    context.nfc=unorm2_getNFCInstance(&errorCode);
1340    context.nfkc=unorm2_getNFKCInstance(&errorCode);
1341    if(U_FAILURE(errorCode)) {
1342        log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1343        return;
1344    }
1345#endif
1346    parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1347    if(U_FAILURE(errorCode)) {
1348        return; /* if we couldn't parse UnicodeData.txt, we should return */
1349    }
1350
1351    /* sanity check on repeated properties */
1352    for(c=0xfffe; c<=0x10ffff;) {
1353        type=u_charType(c);
1354        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1355            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1356        }
1357        if(type!=U_UNASSIGNED) {
1358            log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1359        }
1360        if((c&0xffff)==0xfffe) {
1361            ++c;
1362        } else {
1363            c+=0xffff;
1364        }
1365    }
1366
1367    /* test that PUA is not "unassigned" */
1368    for(c=0xe000; c<=0x10fffd;) {
1369        type=u_charType(c);
1370        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1371            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1372        }
1373        if(type==U_UNASSIGNED) {
1374            log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1375        } else if(type!=U_PRIVATE_USE_CHAR) {
1376            log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1377        }
1378        if(c==0xf8ff) {
1379            c=0xf0000;
1380        } else if(c==0xffffd) {
1381            c=0x100000;
1382        } else {
1383            ++c;
1384        }
1385    }
1386
1387    /* test u_enumCharTypes() */
1388    u_enumCharTypes(enumTypeRange, "a1");
1389
1390    /* check default properties */
1391    u_enumCharTypes(enumDefaultsRange, NULL);
1392}
1393
1394static void TestCodeUnit(){
1395    const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1396
1397    int32_t i;
1398
1399    for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1400        UChar c=codeunit[i];
1401        if(i<4){
1402            if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1403                log_err("ERROR: U+%04x is a single", c);
1404            }
1405
1406        }
1407        if(i >= 4 && i< 8){
1408            if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1409                log_err("ERROR: U+%04x is a first surrogate", c);
1410            }
1411        }
1412        if(i >= 8 && i< 12){
1413            if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1414                log_err("ERROR: U+%04x is a second surrogate", c);
1415            }
1416        }
1417    }
1418
1419}
1420
1421static void TestCodePoint(){
1422    const UChar32 codePoint[]={
1423        /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1424        0xd800,
1425        0xdbff,
1426        0xdc00,
1427        0xdfff,
1428        0xdc04,
1429        0xd821,
1430        /*not a surrogate, valid, isUnicodeChar , not Error*/
1431        0x20ac,
1432        0xd7ff,
1433        0xe000,
1434        0xe123,
1435        0x0061,
1436        0xe065,
1437        0x20402,
1438        0x24506,
1439        0x23456,
1440        0x20402,
1441        0x10402,
1442        0x23456,
1443        /*not a surrogate, not valid, isUnicodeChar, isError */
1444        0x0015,
1445        0x009f,
1446        /*not a surrogate, not valid, not isUnicodeChar, isError */
1447        0xffff,
1448        0xfffe,
1449    };
1450    int32_t i;
1451    for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1452        UChar32 c=codePoint[i];
1453        if(i<6){
1454            if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1455                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1456            }
1457            if(UTF_IS_VALID(c)){
1458                log_err("ERROR: isValid() failed for U+%04x\n", c);
1459            }
1460            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1461                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1462            }
1463            if(UTF_IS_ERROR(c)){
1464                log_err("ERROR: isError() failed for U+%04x\n", c);
1465            }
1466        }else if(i >=6 && i<18){
1467            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1468                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1469            }
1470            if(!UTF_IS_VALID(c)){
1471                log_err("ERROR: isValid() failed for U+%04x\n", c);
1472            }
1473            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1474                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1475            }
1476            if(UTF_IS_ERROR(c)){
1477                log_err("ERROR: isError() failed for U+%04x\n", c);
1478            }
1479        }else if(i >=18 && i<20){
1480            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1481                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1482            }
1483            if(UTF_IS_VALID(c)){
1484                log_err("ERROR: isValid() failed for U+%04x\n", c);
1485            }
1486            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1487                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1488            }
1489            if(!UTF_IS_ERROR(c)){
1490                log_err("ERROR: isError() failed for U+%04x\n", c);
1491            }
1492        }
1493        else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1494            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1495                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1496            }
1497            if(UTF_IS_VALID(c)){
1498                log_err("ERROR: isValid() failed for U+%04x\n", c);
1499            }
1500            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1501                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1502            }
1503            if(!UTF_IS_ERROR(c)){
1504                log_err("ERROR: isError() failed for U+%04x\n", c);
1505            }
1506        }
1507    }
1508
1509    if(
1510        !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1511        !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1512        U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1513        U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1514    ) {
1515        log_err("error with U_IS_BMP()\n");
1516    }
1517
1518    if(
1519        U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1520        U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1521        U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1522        !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1523    ) {
1524        log_err("error with U_IS_SUPPLEMENTARY()\n");
1525    }
1526}
1527
1528static void TestCharLength()
1529{
1530    const int32_t codepoint[]={
1531        1, 0x0061,
1532        1, 0xe065,
1533        1, 0x20ac,
1534        2, 0x20402,
1535        2, 0x23456,
1536        2, 0x24506,
1537        2, 0x20402,
1538        2, 0x10402,
1539        1, 0xd7ff,
1540        1, 0xe000
1541    };
1542
1543    int32_t i;
1544    UBool multiple;
1545    for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1546        UChar32 c=codepoint[i+1];
1547        if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1548            log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1549        }
1550        multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1551        if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1552            log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1553        }
1554    }
1555}
1556
1557/*internal functions ----*/
1558static int32_t MakeProp(char* str)
1559{
1560    int32_t result = 0;
1561    char* matchPosition =0;
1562
1563    matchPosition = strstr(tagStrings, str);
1564    if (matchPosition == 0)
1565    {
1566        log_err("unrecognized type letter ");
1567        log_err(str);
1568    }
1569    else
1570        result = (int32_t)((matchPosition - tagStrings) / 2);
1571    return result;
1572}
1573
1574static int32_t MakeDir(char* str)
1575{
1576    int32_t pos = 0;
1577    for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
1578        if (strcmp(str, dirStrings[pos]) == 0) {
1579            return pos;
1580        }
1581    }
1582    return -1;
1583}
1584
1585/* test u_charName() -------------------------------------------------------- */
1586
1587static const struct {
1588    uint32_t code;
1589    const char *name, *oldName, *extName, *alias;
1590} names[]={
1591    {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1592    {0x01a2, "LATIN CAPITAL LETTER OI", "",
1593             "LATIN CAPITAL LETTER OI",
1594             "LATIN CAPITAL LETTER GHA"},
1595    {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1596             "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1597    {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1598             "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1599             "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1600    {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1601    {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1602    {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1603    {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1604    {0xd800, "", "", "<lead surrogate-D800>" },
1605    {0xdc00, "", "", "<trail surrogate-DC00>" },
1606    {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1607    {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1608    {0xffff, "", "", "<noncharacter-FFFF>" },
1609    {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1610              "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1611              "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1612    {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1613};
1614
1615static UBool
1616enumCharNamesFn(void *context,
1617                UChar32 code, UCharNameChoice nameChoice,
1618                const char *name, int32_t length) {
1619    int32_t *pCount=(int32_t *)context;
1620    const char *expected;
1621    int i;
1622
1623    if(length<=0 || length!=(int32_t)strlen(name)) {
1624        /* should not be called with an empty string or invalid length */
1625        log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1626        return TRUE;
1627    }
1628
1629    ++*pCount;
1630    for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1631        if(code==(UChar32)names[i].code) {
1632            switch (nameChoice) {
1633                case U_EXTENDED_CHAR_NAME:
1634                    if(0!=strcmp(name, names[i].extName)) {
1635                        log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1636                    }
1637                    break;
1638                case U_UNICODE_CHAR_NAME:
1639                    if(0!=strcmp(name, names[i].name)) {
1640                        log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1641                    }
1642                    break;
1643                case U_UNICODE_10_CHAR_NAME:
1644                    expected=names[i].oldName;
1645                    if(expected[0]==0 || 0!=strcmp(name, expected)) {
1646                        log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1647                    }
1648                    break;
1649                case U_CHAR_NAME_ALIAS:
1650                    expected=names[i].alias;
1651                    if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1652                        log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1653                    }
1654                    break;
1655                case U_CHAR_NAME_CHOICE_COUNT:
1656                    break;
1657            }
1658            break;
1659        }
1660    }
1661    return TRUE;
1662}
1663
1664struct enumExtCharNamesContext {
1665    uint32_t length;
1666    int32_t last;
1667};
1668
1669static UBool
1670enumExtCharNamesFn(void *context,
1671                UChar32 code, UCharNameChoice nameChoice,
1672                const char *name, int32_t length) {
1673    struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1674
1675    if (ecncp->last != (int32_t) code - 1) {
1676        if (ecncp->last < 0) {
1677            log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1678        } else {
1679            log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1680        }
1681    }
1682    ecncp->last = (int32_t) code;
1683
1684    if (!*name) {
1685        log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1686    }
1687
1688    return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1689}
1690
1691/**
1692 * This can be made more efficient by moving it into putil.c and having
1693 * it directly access the ebcdic translation tables.
1694 * TODO: If we get this method in putil.c, then delete it from here.
1695 */
1696static UChar
1697u_charToUChar(char c) {
1698    UChar uc;
1699    u_charsToUChars(&c, &uc, 1);
1700    return uc;
1701}
1702
1703static void
1704TestCharNames() {
1705    static char name[80];
1706    UErrorCode errorCode=U_ZERO_ERROR;
1707    struct enumExtCharNamesContext extContext;
1708    const char *expected;
1709    int32_t length;
1710    UChar32 c;
1711    int32_t i;
1712
1713    log_verbose("Testing uprv_getMaxCharNameLength()\n");
1714    length=uprv_getMaxCharNameLength();
1715    if(length==0) {
1716        /* no names data available */
1717        return;
1718    }
1719    if(length<83) { /* Unicode 3.2 max char name length */
1720        log_err("uprv_getMaxCharNameLength()=%d is too short");
1721    }
1722    /* ### TODO same tests for max ISO comment length as for max name length */
1723
1724    log_verbose("Testing u_charName()\n");
1725    for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1726        /* modern Unicode character name */
1727        length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1728        if(U_FAILURE(errorCode)) {
1729            log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1730            return;
1731        }
1732        if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1733            log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1734        }
1735
1736        /* find the modern name */
1737        if (*names[i].name) {
1738            c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1739            if(U_FAILURE(errorCode)) {
1740                log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1741                return;
1742            }
1743            if(c!=(UChar32)names[i].code) {
1744                log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1745            }
1746        }
1747
1748        /* Unicode 1.0 character name */
1749        length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1750        if(U_FAILURE(errorCode)) {
1751            log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1752            return;
1753        }
1754        if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1755            log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1756        }
1757
1758        /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1759        if(names[i].oldName[0]!=0 /* && length>0 */) {
1760            c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1761            if(U_FAILURE(errorCode)) {
1762                log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1763                return;
1764            }
1765            if(c!=(UChar32)names[i].code) {
1766                log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1767            }
1768        }
1769
1770        /* Unicode character name alias */
1771        length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1772        if(U_FAILURE(errorCode)) {
1773            log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1774            return;
1775        }
1776        expected=names[i].alias;
1777        if(expected==NULL) {
1778            expected="";
1779        }
1780        if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1781            log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1782                    names[i].code, name, length, expected);
1783        }
1784
1785        /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1786        if(expected[0]!=0 /* && length>0 */) {
1787            c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1788            if(U_FAILURE(errorCode)) {
1789                log_err("u_charFromName(%s - alias) error %s\n",
1790                        expected, u_errorName(errorCode));
1791                return;
1792            }
1793            if(c!=(UChar32)names[i].code) {
1794                log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1795                        expected, c, names[i].code);
1796            }
1797        }
1798    }
1799
1800    /* test u_enumCharNames() */
1801    length=0;
1802    errorCode=U_ZERO_ERROR;
1803    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1804    if(U_FAILURE(errorCode) || length<94140) {
1805        log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1806    }
1807
1808    extContext.length = 0;
1809    extContext.last = -1;
1810    errorCode=U_ZERO_ERROR;
1811    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1812    if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1813        log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1814    }
1815
1816    /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1817    if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1818        log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1819    }
1820
1821    /* Test getCharNameCharacters */
1822    if(!getTestOption(QUICK_OPTION)) {
1823        enum { BUFSIZE = 256 };
1824        UErrorCode ec = U_ZERO_ERROR;
1825        char buf[BUFSIZE];
1826        int32_t maxLength;
1827        UChar32 cp;
1828        UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1829        int32_t l1, l2;
1830        UBool map[256];
1831        UBool ok;
1832
1833        USet* set = uset_open(1, 0); /* empty set */
1834        USet* dumb = uset_open(1, 0); /* empty set */
1835
1836        /*
1837         * uprv_getCharNameCharacters() will likely return more lowercase
1838         * letters than actual character names contain because
1839         * it includes all the characters in lowercased names of
1840         * general categories, for the full possible set of extended names.
1841         */
1842        {
1843            USetAdder sa={
1844                NULL,
1845                uset_add,
1846                uset_addRange,
1847                uset_addString,
1848                NULL /* don't need remove() */
1849            };
1850            sa.set=set;
1851            uprv_getCharNameCharacters(&sa);
1852        }
1853
1854        /* build set the dumb (but sure-fire) way */
1855        for (i=0; i<256; ++i) {
1856            map[i] = FALSE;
1857        }
1858
1859        maxLength=0;
1860        for (cp=0; cp<0x110000; ++cp) {
1861            int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1862                                     buf, BUFSIZE, &ec);
1863            if (U_FAILURE(ec)) {
1864                log_err("FAIL: u_charName failed when it shouldn't\n");
1865                uset_close(set);
1866                uset_close(dumb);
1867                return;
1868            }
1869            if(len>maxLength) {
1870                maxLength=len;
1871            }
1872
1873            for (i=0; i<len; ++i) {
1874                if (!map[(uint8_t) buf[i]]) {
1875                    uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1876                    map[(uint8_t) buf[i]] = TRUE;
1877                }
1878            }
1879
1880            /* test for leading/trailing whitespace */
1881            if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1882                log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1883            }
1884        }
1885
1886        if(map[(uint8_t)'\t']) {
1887            log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1888        }
1889
1890        length=uprv_getMaxCharNameLength();
1891        if(length!=maxLength) {
1892            log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1893                    length, maxLength);
1894        }
1895
1896        /* compare the sets.  Where is my uset_equals?!! */
1897        ok=TRUE;
1898        for(i=0; i<256; ++i) {
1899            if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1900                if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1901                    /* ignore lowercase a-z that are in set but not in dumb */
1902                    ok=TRUE;
1903                } else {
1904                    ok=FALSE;
1905                    break;
1906                }
1907            }
1908        }
1909
1910        l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1911        l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1912        if (U_FAILURE(ec)) {
1913            log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1914            uset_close(set);
1915            uset_close(dumb);
1916            return;
1917        }
1918
1919        if (l1 >= BUFSIZE) {
1920            l1 = BUFSIZE-1;
1921            pat[l1] = 0;
1922        }
1923        if (l2 >= BUFSIZE) {
1924            l2 = BUFSIZE-1;
1925            dumbPat[l2] = 0;
1926        }
1927
1928        if (!ok) {
1929            log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1930                    aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1931        } else if(getTestOption(VERBOSITY_OPTION)) {
1932            log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1933        }
1934
1935        uset_close(set);
1936        uset_close(dumb);
1937    }
1938
1939    /* ### TODO: test error cases and other interesting things */
1940}
1941
1942static void
1943TestUCharFromNameUnderflow() {
1944    // Ticket #10889: Underflow crash when there is no dash.
1945    UErrorCode errorCode=U_ZERO_ERROR;
1946    UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode);
1947    if(U_SUCCESS(errorCode)) {
1948        log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1949    }
1950
1951    // Test related edge cases.
1952    errorCode=U_ZERO_ERROR;
1953    c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode);
1954    if(U_SUCCESS(errorCode)) {
1955        log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1956    }
1957
1958    errorCode=U_ZERO_ERROR;
1959    c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode);
1960    if(U_SUCCESS(errorCode)) {
1961        log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1962    }
1963
1964    errorCode=U_ZERO_ERROR;
1965    c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode);
1966    if(U_SUCCESS(errorCode)) {
1967        log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode));
1968    }
1969}
1970
1971/* test u_isMirrored() and u_charMirror() ----------------------------------- */
1972
1973static void
1974TestMirroring() {
1975    USet *set;
1976    UErrorCode errorCode;
1977
1978    UChar32 start, end, c2, c3;
1979    int32_t i;
1980
1981    U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1982
1983    U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1984
1985    log_verbose("Testing u_isMirrored()\n");
1986    if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1987         !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1988        )
1989    ) {
1990        log_err("u_isMirrored() does not work correctly\n");
1991    }
1992
1993    log_verbose("Testing u_charMirror()\n");
1994    if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1995         u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1996         u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1997         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1998         u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1999         )
2000    ) {
2001        log_err("u_charMirror() does not work correctly\n");
2002    }
2003
2004    /* verify that Bidi_Mirroring_Glyph roundtrips */
2005    errorCode=U_ZERO_ERROR;
2006    set=uset_openPattern(mirroredPattern, 17, &errorCode);
2007
2008    if (U_FAILURE(errorCode)) {
2009        log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
2010    } else {
2011        for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
2012            do {
2013                c2=u_charMirror(start);
2014                c3=u_charMirror(c2);
2015                if(c3!=start) {
2016                    log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
2017                }
2018                c3=u_getBidiPairedBracket(start);
2019                if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
2020                    if(c3!=start) {
2021                        log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
2022                                (long)start);
2023                    }
2024                } else {
2025                    if(c3!=c2) {
2026                        log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
2027                                (long)start, (long)c2);
2028                    }
2029                }
2030            } while(++start<=end);
2031        }
2032    }
2033
2034    uset_close(set);
2035}
2036
2037
2038struct RunTestData
2039{
2040    const char *runText;
2041    UScriptCode runCode;
2042};
2043
2044typedef struct RunTestData RunTestData;
2045
2046static void
2047CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2048                const char *prefix)
2049{
2050    int32_t run, runStart, runLimit;
2051    UScriptCode runCode;
2052
2053    /* iterate over all the runs */
2054    run = 0;
2055    while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2056        if (runStart != runStarts[run]) {
2057            log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2058                prefix, run, runStarts[run], runStart);
2059        }
2060
2061        if (runLimit != runStarts[run + 1]) {
2062            log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2063                prefix, run, runStarts[run + 1], runLimit);
2064        }
2065
2066        if (runCode != testData[run].runCode) {
2067            log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2068                prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2069        }
2070
2071        run += 1;
2072
2073        /* stop when we've seen all the runs we expect to see */
2074        if (run >= nRuns) {
2075            break;
2076        }
2077    }
2078
2079    /* Complain if we didn't see then number of runs we expected */
2080    if (run != nRuns) {
2081        log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2082    }
2083}
2084
2085static void
2086TestUScriptRunAPI()
2087{
2088    static const RunTestData testData1[] = {
2089        {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2090        {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2091        {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2092        {"English (", USCRIPT_LATIN},
2093        {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2094        {") ", USCRIPT_LATIN},
2095        {"\\u6F22\\u5B75", USCRIPT_HAN},
2096        {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2097        {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2098        {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2099    };
2100
2101    static const RunTestData testData2[] = {
2102       {"((((((((((abc))))))))))", USCRIPT_LATIN}
2103    };
2104
2105    static const struct {
2106      const RunTestData *testData;
2107      int32_t nRuns;
2108    } testDataEntries[] = {
2109        {testData1, UPRV_LENGTHOF(testData1)},
2110        {testData2, UPRV_LENGTHOF(testData2)}
2111    };
2112
2113    static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);
2114    int32_t testEntry;
2115
2116    for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2117        UChar testString[1024];
2118        int32_t runStarts[256];
2119        int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2120        const RunTestData *testData = testDataEntries[testEntry].testData;
2121
2122        int32_t run, stringLimit;
2123        UScriptRun *scriptRun = NULL;
2124        UErrorCode err;
2125
2126        /*
2127         * Fill in the test string and the runStarts array.
2128         */
2129        stringLimit = 0;
2130        for (run = 0; run < nTestRuns; run += 1) {
2131            runStarts[run] = stringLimit;
2132            stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2133            /*stringLimit -= 1;*/
2134        }
2135
2136        /* The limit of the last run */
2137        runStarts[nTestRuns] = stringLimit;
2138
2139        /*
2140         * Make sure that calling uscript_OpenRun with a NULL text pointer
2141         * and a non-zero text length returns the correct error.
2142         */
2143        err = U_ZERO_ERROR;
2144        scriptRun = uscript_openRun(NULL, stringLimit, &err);
2145
2146        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2147            log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2148        }
2149
2150        if (scriptRun != NULL) {
2151            log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2152            uscript_closeRun(scriptRun);
2153        }
2154
2155        /*
2156         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2157         * and a zero text length returns the correct error.
2158         */
2159        err = U_ZERO_ERROR;
2160        scriptRun = uscript_openRun(testString, 0, &err);
2161
2162        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2163            log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2164        }
2165
2166        if (scriptRun != NULL) {
2167            log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2168            uscript_closeRun(scriptRun);
2169        }
2170
2171        /*
2172         * Make sure that calling uscript_openRun with a NULL text pointer
2173         * and a zero text length doesn't return an error.
2174         */
2175        err = U_ZERO_ERROR;
2176        scriptRun = uscript_openRun(NULL, 0, &err);
2177
2178        if (U_FAILURE(err)) {
2179            log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2180        }
2181
2182        /* Make sure that the empty iterator doesn't find any runs */
2183        if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2184            log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2185        }
2186
2187        /*
2188         * Make sure that calling uscript_setRunText with a NULL text pointer
2189         * and a non-zero text length returns the correct error.
2190         */
2191        err = U_ZERO_ERROR;
2192        uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2193
2194        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2195            log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2196        }
2197
2198        /*
2199         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2200         * and a zero text length returns the correct error.
2201         */
2202        err = U_ZERO_ERROR;
2203        uscript_setRunText(scriptRun, testString, 0, &err);
2204
2205        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2206            log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2207        }
2208
2209        /*
2210         * Now call uscript_setRunText on the empty iterator
2211         * and make sure that it works.
2212         */
2213        err = U_ZERO_ERROR;
2214        uscript_setRunText(scriptRun, testString, stringLimit, &err);
2215
2216        if (U_FAILURE(err)) {
2217            log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2218        } else {
2219            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2220        }
2221
2222        uscript_closeRun(scriptRun);
2223
2224        /*
2225         * Now open an interator over the testString
2226         * using uscript_openRun and make sure that it works
2227         */
2228        scriptRun = uscript_openRun(testString, stringLimit, &err);
2229
2230        if (U_FAILURE(err)) {
2231            log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2232        } else {
2233            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2234        }
2235
2236        /* Now reset the iterator, and make sure
2237         * that it still works.
2238         */
2239        uscript_resetRun(scriptRun);
2240
2241        CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2242
2243        /* Close the iterator */
2244        uscript_closeRun(scriptRun);
2245    }
2246}
2247
2248/* test additional, non-core properties */
2249static void
2250TestAdditionalProperties() {
2251    /* test data for u_charAge() */
2252    static const struct {
2253        UChar32 c;
2254        UVersionInfo version;
2255    } charAges[]={
2256        {0x41,    { 1, 1, 0, 0 }},
2257        {0xffff,  { 1, 1, 0, 0 }},
2258        {0x20ab,  { 2, 0, 0, 0 }},
2259        {0x2fffe, { 2, 0, 0, 0 }},
2260        {0x20ac,  { 2, 1, 0, 0 }},
2261        {0xfb1d,  { 3, 0, 0, 0 }},
2262        {0x3f4,   { 3, 1, 0, 0 }},
2263        {0x10300, { 3, 1, 0, 0 }},
2264        {0x220,   { 3, 2, 0, 0 }},
2265        {0xff60,  { 3, 2, 0, 0 }}
2266    };
2267
2268    /* test data for u_hasBinaryProperty() */
2269    static const int32_t
2270    props[][3]={ /* code point, property, value */
2271        { 0x0627, UCHAR_ALPHABETIC, TRUE },
2272        { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2273        { 0x2028, UCHAR_ALPHABETIC, FALSE },
2274
2275        { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2276        { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2277
2278        { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2279        { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2280
2281        { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2282        { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2283
2284        /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2285        { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2286        { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2287        { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2288        { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2289
2290        { 0x058a, UCHAR_DASH, TRUE },
2291        { 0x007e, UCHAR_DASH, FALSE },
2292
2293        { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2294        { 0x3000, UCHAR_DIACRITIC, FALSE },
2295
2296        { 0x0e46, UCHAR_EXTENDER, TRUE },
2297        { 0x0020, UCHAR_EXTENDER, FALSE },
2298
2299#if !UCONFIG_NO_NORMALIZATION
2300        { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2301        { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2302        { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2303
2304        { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2305        { 0x0308, UCHAR_NFD_INERT, FALSE },
2306
2307        { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2308        { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2309
2310        { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2311        { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2312        { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2313        { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2314        { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2315        { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2316
2317        { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2318        { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2319
2320        { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2321        { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2322        { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2323        { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2324        { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2325        { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2326#endif
2327
2328        { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2329        { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2330        { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2331
2332        { 0x30fb, UCHAR_HYPHEN, TRUE },
2333        { 0xfe58, UCHAR_HYPHEN, FALSE },
2334
2335        { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2336        { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2337        { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2338
2339        { 0x2172, UCHAR_ID_START, TRUE },
2340        { 0x007a, UCHAR_ID_START, TRUE },
2341        { 0x0039, UCHAR_ID_START, FALSE },
2342
2343        { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2344        { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2345        { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2346
2347        { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2348        { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2349
2350        { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2351        { 0x0345, UCHAR_LOWERCASE, TRUE },
2352        { 0x0030, UCHAR_LOWERCASE, FALSE },
2353
2354        { 0x1d7a9, UCHAR_MATH, TRUE },
2355        { 0x2135, UCHAR_MATH, TRUE },
2356        { 0x0062, UCHAR_MATH, FALSE },
2357
2358        { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2359        { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2360        { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2361
2362        { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2363        { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2364        { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2365
2366        { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2367        { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2368
2369        { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2370        { 0x2162, UCHAR_UPPERCASE, TRUE },
2371        { 0x0345, UCHAR_UPPERCASE, FALSE },
2372
2373        { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2374        { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2375        { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2376
2377        { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2378        { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2379        { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2380
2381        { 0x16ee, UCHAR_XID_START, TRUE },
2382        { 0x23456, UCHAR_XID_START, TRUE },
2383        { 0x1d1aa, UCHAR_XID_START, FALSE },
2384
2385        /*
2386         * Version break:
2387         * The following properties are only supported starting with the
2388         * Unicode version indicated in the second field.
2389         */
2390        { -1, 0x320, 0 },
2391
2392        { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2393        { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2394        { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2395
2396        { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2397        { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2398        { 0xe0001, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2399        { 0xe0100, UCHAR_DEPRECATED, FALSE },
2400
2401        { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2402        { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2403        { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2404        { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2405
2406        { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2407        { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2408        { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2409        { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2410
2411        { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2412        { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2413
2414        { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2415        { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2416
2417        { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2418        { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2419
2420        { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2421        { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2422
2423        { 0x2e9b, UCHAR_RADICAL, TRUE },
2424        { 0x4e00, UCHAR_RADICAL, FALSE },
2425
2426        { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2427        { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2428
2429        { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2430        { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2431
2432        { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2433
2434        { 0x002e, UCHAR_S_TERM, TRUE },
2435        { 0x0061, UCHAR_S_TERM, FALSE },
2436
2437        { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2438        { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2439        { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2440        { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2441
2442        /* enum/integer type properties */
2443
2444        /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2445        /* test default Bidi classes for unassigned code points */
2446        { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2447        { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2448        { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2449        { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2450        { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2451        { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2452        { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2453        { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2454        { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2455        { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2456        { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2457
2458        { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2459        { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2460        { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2461        { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2462        { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2463        { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2464        { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2465
2466        { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2467        { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2468        { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2469        { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2470        { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2471        { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2472        { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2473        { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2474        { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2475        { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2476        { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2477
2478        /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2479        { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2480
2481        { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2482        { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2483        { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2484        { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2485        { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2486        { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2487        { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2488        { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2489        { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2490
2491        { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2492        { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2493        { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2494        { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2495        { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2496        { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2497        { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2498        { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2499        { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2500        { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2501        { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2502        { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2503        { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2504        { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2505        { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2506        { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2507        { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2508
2509        /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2510        { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2511        { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2512
2513        { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2514        { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2515        { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2516        { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2517        { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2518
2519        { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2520        { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2521        { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2522        { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2523        { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2524        { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2525        { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2526        { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2527
2528        /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2529        { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2530        { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2531        { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2532        { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2533        { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2534        { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2535        { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2536        { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2537        { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2538        { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2539        { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2540        { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2541        { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2542        { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2543        { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2544
2545        /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2546
2547        /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2548
2549        { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2550        { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2551        { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2552        { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2553        { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2554        { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2555        { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2556
2557        { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2558        { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2559        { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2560        { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2561
2562        { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2563        { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2564        { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2565        { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2566        { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2567        { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2568
2569        { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2570        { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2571        { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2572        { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2573
2574        { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2575        { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2576        { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2577        { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2578        { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2579        { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2580        { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2581
2582        { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2583        { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2584        { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2585        { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2586
2587        { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2588        { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2589        { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2590        { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2591
2592        { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2593        { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2594        { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2595        { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2596        { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2597
2598        { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2599
2600        { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2601
2602        { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2603        { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2604        { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2605
2606        { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2607        { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2608        { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2609        { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2610        { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2611
2612        { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2613        { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2614        { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2615
2616        { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2617        { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2618        { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2619        { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2620
2621        { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2622        { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2623        { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2624        { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2625        { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2626        { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2627
2628        { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2629        { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2630        { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2631        { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2632
2633        { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2634        { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2635        { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2636        { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2637
2638        { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2639        { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2640        { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2641        { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2642
2643        { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2644
2645        /* unassigned code points in new default Bidi R blocks */
2646        { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2647        { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2648
2649        /* test some script codes >127 */
2650        { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2651        { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2652        { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2653
2654        { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2655
2656        /* value changed in Unicode 6.0 */
2657        { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2658
2659        { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2660
2661        /* unassigned code points in new/changed default Bidi AL blocks */
2662        { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2663        { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2664
2665        { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2666
2667        /* unassigned code points in the currency symbols block now default to ET */
2668        { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2669        { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2670
2671        /* new property in Unicode 6.3 */
2672        { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2673        { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2674        { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2675        { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2676        { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2677        { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2678
2679        { -1, 0x700, 0 }, /* version break for Unicode 7.0 */
2680
2681        /* new character range with Joining_Group values */
2682        { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2683        { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },
2684        { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },
2685        { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },
2686        { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2687
2688        /* undefined UProperty values */
2689        { 0x61, 0x4a7, 0 },
2690        { 0x234bc, 0x15ed, 0 }
2691    };
2692
2693    UVersionInfo version;
2694    UChar32 c;
2695    int32_t i, result, uVersion;
2696    UProperty which;
2697
2698    /* what is our Unicode version? */
2699    u_getUnicodeVersion(version);
2700    uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2701
2702    u_charAge(0x20, version);
2703    if(version[0]==0) {
2704        /* no additional properties available */
2705        log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2706        return;
2707    }
2708
2709    /* test u_charAge() */
2710    for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2711        u_charAge(charAges[i].c, version);
2712        if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2713            log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2714                charAges[i].c,
2715                version[0], version[1], version[2], version[3],
2716                charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2717        }
2718    }
2719
2720    if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2721        u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2722        u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2723        u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2724        u_getIntPropertyMinValue(0x2345)!=0
2725    ) {
2726        log_err("error: u_getIntPropertyMinValue() wrong\n");
2727    }
2728    if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2729        log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2730    }
2731    if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2732        log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2733    }
2734    if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2735        log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2736    }
2737    if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2738        log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2739    }
2740    if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2741        log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2742    }
2743    if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2744        log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2745    }
2746    if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2747        log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2748    }
2749    if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2750        log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2751    }
2752    if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2753        log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2754    }
2755    if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2756        log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2757    }
2758    if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2759        log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2760    }
2761    if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2762        log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2763    }
2764    if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2765        log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2766    }
2767    if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2768        log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2769    }
2770    /*JB#2410*/
2771    if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2772        log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2773    }
2774    if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2775        log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2776    }
2777    if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2778        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2779    }
2780    if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2781        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2782    }
2783    if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2784        log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2785    }
2786
2787    /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2788    for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2789        const char *whichName;
2790
2791        if(props[i][0]<0) {
2792            /* Unicode version break */
2793            if(uVersion<props[i][1]) {
2794                break; /* do not test properties that are not yet supported */
2795            } else {
2796                continue; /* skip this row */
2797            }
2798        }
2799
2800        c=(UChar32)props[i][0];
2801        which=(UProperty)props[i][1];
2802        whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2803
2804        if(which<UCHAR_INT_START) {
2805            result=u_hasBinaryProperty(c, which);
2806            if(result!=props[i][2]) {
2807                log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2808                        c, whichName, result, i);
2809            }
2810        }
2811
2812        result=u_getIntPropertyValue(c, which);
2813        if(result!=props[i][2]) {
2814            log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2815                    c, whichName, result, props[i][2], i);
2816        }
2817
2818        /* test separate functions, too */
2819        switch((UProperty)props[i][1]) {
2820        case UCHAR_ALPHABETIC:
2821            if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2822                log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2823                        props[i][0], result, i);
2824            }
2825            break;
2826        case UCHAR_LOWERCASE:
2827            if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2828                log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2829                        props[i][0], result, i);
2830            }
2831            break;
2832        case UCHAR_UPPERCASE:
2833            if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2834                log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2835                        props[i][0], result, i);
2836            }
2837            break;
2838        case UCHAR_WHITE_SPACE:
2839            if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2840                log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2841                        props[i][0], result, i);
2842            }
2843            break;
2844        default:
2845            break;
2846        }
2847    }
2848}
2849
2850static void
2851TestNumericProperties(void) {
2852    /* see UnicodeData.txt, DerivedNumericValues.txt */
2853    static const struct {
2854        UChar32 c;
2855        int32_t type;
2856        double numValue;
2857    } values[]={
2858        { 0x0F33, U_NT_NUMERIC, -1./2. },
2859        { 0x0C66, U_NT_DECIMAL, 0 },
2860        { 0x96f6, U_NT_NUMERIC, 0 },
2861        { 0xa833, U_NT_NUMERIC, 1./16. },
2862        { 0x2152, U_NT_NUMERIC, 1./10. },
2863        { 0x2151, U_NT_NUMERIC, 1./9. },
2864        { 0x1245f, U_NT_NUMERIC, 1./8. },
2865        { 0x2150, U_NT_NUMERIC, 1./7. },
2866        { 0x2159, U_NT_NUMERIC, 1./6. },
2867        { 0x09f6, U_NT_NUMERIC, 3./16. },
2868        { 0x2155, U_NT_NUMERIC, 1./5. },
2869        { 0x00BD, U_NT_NUMERIC, 1./2. },
2870        { 0x0031, U_NT_DECIMAL, 1. },
2871        { 0x4e00, U_NT_NUMERIC, 1. },
2872        { 0x58f1, U_NT_NUMERIC, 1. },
2873        { 0x10320, U_NT_NUMERIC, 1. },
2874        { 0x0F2B, U_NT_NUMERIC, 3./2. },
2875        { 0x00B2, U_NT_DIGIT, 2. },
2876        { 0x5f10, U_NT_NUMERIC, 2. },
2877        { 0x1813, U_NT_DECIMAL, 3. },
2878        { 0x5f0e, U_NT_NUMERIC, 3. },
2879        { 0x2173, U_NT_NUMERIC, 4. },
2880        { 0x8086, U_NT_NUMERIC, 4. },
2881        { 0x278E, U_NT_DIGIT, 5. },
2882        { 0x1D7F2, U_NT_DECIMAL, 6. },
2883        { 0x247A, U_NT_DIGIT, 7. },
2884        { 0x7396, U_NT_NUMERIC, 9. },
2885        { 0x1372, U_NT_NUMERIC, 10. },
2886        { 0x216B, U_NT_NUMERIC, 12. },
2887        { 0x16EE, U_NT_NUMERIC, 17. },
2888        { 0x249A, U_NT_NUMERIC, 19. },
2889        { 0x303A, U_NT_NUMERIC, 30. },
2890        { 0x5345, U_NT_NUMERIC, 30. },
2891        { 0x32B2, U_NT_NUMERIC, 37. },
2892        { 0x1375, U_NT_NUMERIC, 40. },
2893        { 0x10323, U_NT_NUMERIC, 50. },
2894        { 0x0BF1, U_NT_NUMERIC, 100. },
2895        { 0x964c, U_NT_NUMERIC, 100. },
2896        { 0x217E, U_NT_NUMERIC, 500. },
2897        { 0x2180, U_NT_NUMERIC, 1000. },
2898        { 0x4edf, U_NT_NUMERIC, 1000. },
2899        { 0x2181, U_NT_NUMERIC, 5000. },
2900        { 0x137C, U_NT_NUMERIC, 10000. },
2901        { 0x4e07, U_NT_NUMERIC, 10000. },
2902        { 0x12432, U_NT_NUMERIC, 216000. },
2903        { 0x12433, U_NT_NUMERIC, 432000. },
2904        { 0x4ebf, U_NT_NUMERIC, 100000000. },
2905        { 0x5146, U_NT_NUMERIC, 1000000000000. },
2906        { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2907        { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2908        { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2909        { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2910        { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2911        { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2912        { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2913        { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2914    };
2915
2916    double nv;
2917    UChar32 c;
2918    int32_t i, type;
2919
2920    for(i=0; i<UPRV_LENGTHOF(values); ++i) {
2921        c=values[i].c;
2922        type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2923        nv=u_getNumericValue(c);
2924
2925        if(type!=values[i].type) {
2926            log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2927        }
2928        if(0.000001 <= fabs(nv - values[i].numValue)) {
2929            log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2930        }
2931    }
2932}
2933
2934/**
2935 * Test the property names and property value names API.
2936 */
2937static void
2938TestPropertyNames(void) {
2939    int32_t p, v, choice=0, rev;
2940    UBool atLeastSomething = FALSE;
2941
2942    for (p=0; ; ++p) {
2943        UProperty propEnum = (UProperty)p;
2944        UBool sawProp = FALSE;
2945        if(p > 10 && !atLeastSomething) {
2946          log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2947          return;
2948        }
2949
2950        for (choice=0; ; ++choice) {
2951            const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2952            if (name) {
2953                if (!sawProp)
2954                    log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2955                log_verbose("%d=\"%s\"", choice, name);
2956                sawProp = TRUE;
2957                atLeastSomething = TRUE;
2958
2959                /* test reverse mapping */
2960                rev = u_getPropertyEnum(name);
2961                if (rev != p) {
2962                    log_err("Property round-trip failure: %d -> %s -> %d\n",
2963                            p, name, rev);
2964                }
2965            }
2966            if (!name && choice>0) break;
2967        }
2968        if (sawProp) {
2969            /* looks like a valid property; check the values */
2970            const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2971            int32_t max = 0;
2972            if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2973                max = 255;
2974            } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2975                /* it's far too slow to iterate all the way up to
2976                   the real max, U_GC_P_MASK */
2977                max = U_GC_NL_MASK;
2978            } else if (p == UCHAR_BLOCK) {
2979                /* UBlockCodes, unlike other values, start at 1 */
2980                max = 1;
2981            }
2982            log_verbose("\n");
2983            for (v=-1; ; ++v) {
2984                UBool sawValue = FALSE;
2985                for (choice=0; ; ++choice) {
2986                    const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2987                    if (vname) {
2988                        if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2989                        log_verbose("%d=\"%s\"", choice, vname);
2990                        sawValue = TRUE;
2991
2992                        /* test reverse mapping */
2993                        rev = u_getPropertyValueEnum(propEnum, vname);
2994                        if (rev != v) {
2995                            log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2996                                    pname, v, vname, rev);
2997                        }
2998                    }
2999                    if (!vname && choice>0) break;
3000                }
3001                if (sawValue) {
3002                    log_verbose("\n");
3003                }
3004                if (!sawValue && v>=max) break;
3005            }
3006        }
3007        if (!sawProp) {
3008            if (p>=UCHAR_STRING_LIMIT) {
3009                break;
3010            } else if (p>=UCHAR_DOUBLE_LIMIT) {
3011                p = UCHAR_STRING_START - 1;
3012            } else if (p>=UCHAR_MASK_LIMIT) {
3013                p = UCHAR_DOUBLE_START - 1;
3014            } else if (p>=UCHAR_INT_LIMIT) {
3015                p = UCHAR_MASK_START - 1;
3016            } else if (p>=UCHAR_BINARY_LIMIT) {
3017                p = UCHAR_INT_START - 1;
3018            }
3019        }
3020    }
3021}
3022
3023/**
3024 * Test the property values API.  See JB#2410.
3025 */
3026static void
3027TestPropertyValues(void) {
3028    int32_t i, p, min, max;
3029    UErrorCode ec;
3030
3031    /* Min should be 0 for everything. */
3032    /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
3033    for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
3034        UProperty propEnum = (UProperty)p;
3035        min = u_getIntPropertyMinValue(propEnum);
3036        if (min != 0) {
3037            if (p == UCHAR_BLOCK) {
3038                /* This is okay...for now.  See JB#2487.
3039                   TODO Update this for JB#2487. */
3040            } else {
3041                const char* name;
3042                name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3043                if (name == NULL)
3044                    name = "<ERROR>";
3045                log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3046                        name, min);
3047            }
3048        }
3049    }
3050
3051    if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3052        u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3053        log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3054    }
3055
3056    /* Max should be -1 for invalid properties. */
3057    max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
3058    if (max != -1) {
3059        log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3060                max);
3061    }
3062
3063    /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
3064    for (i=0; i<2; ++i) {
3065        int32_t script;
3066        const char* desc;
3067        ec = U_ZERO_ERROR;
3068        switch (i) {
3069        case 0:
3070            script = uscript_getScript(-1, &ec);
3071            desc = "uscript_getScript(-1)";
3072            break;
3073        case 1:
3074            script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3075            desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3076            break;
3077        default:
3078            log_err("Internal test error. Too many scripts\n");
3079            return;
3080        }
3081        /* We don't explicitly test ec.  It should be U_FAILURE but it
3082           isn't documented as such. */
3083        if (script != (int32_t)USCRIPT_INVALID_CODE) {
3084            log_err("FAIL: %s = %d, exp. 0\n",
3085                    desc, script);
3086        }
3087    }
3088}
3089
3090/* various tests for consistency of UCD data and API behavior */
3091static void
3092TestConsistency() {
3093    char buffer[300];
3094    USet *set1, *set2, *set3, *set4;
3095    UErrorCode errorCode;
3096
3097    UChar32 start, end;
3098    int32_t i, length;
3099
3100    U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3101    U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3102    U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3103    U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3104    U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3105
3106    U_STRING_DECL(mathBlocksPattern,
3107        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3108        214);
3109    U_STRING_DECL(mathPattern, "[:Math:]", 8);
3110    U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3111    U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3112    U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3113
3114    U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3115    U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3116    U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3117    U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3118    U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3119
3120    U_STRING_INIT(mathBlocksPattern,
3121        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3122        214);
3123    U_STRING_INIT(mathPattern, "[:Math:]", 8);
3124    U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3125    U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3126    U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3127
3128    /*
3129     * It used to be that UCD.html and its precursors said
3130     * "Those dashes used to mark connections between pieces of words,
3131     *  plus the Katakana middle dot."
3132     *
3133     * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3134     * but not from Hyphen.
3135     * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3136     * Therefore, do not show errors when testing the Hyphen property.
3137     */
3138    log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3139                "known to the UTC and not considered errors.\n");
3140
3141    errorCode=U_ZERO_ERROR;
3142    set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3143    set2=uset_openPattern(dashPattern, 8, &errorCode);
3144    if(U_SUCCESS(errorCode)) {
3145        /* remove the Katakana middle dot(s) from set1 */
3146        uset_remove(set1, 0x30fb);
3147        uset_remove(set1, 0xff65); /* halfwidth variant */
3148        showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3149    } else {
3150        log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3151    }
3152
3153    /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3154    set3=uset_openPattern(formatPattern, 6, &errorCode);
3155    set4=uset_openPattern(alphaPattern, 14, &errorCode);
3156    if(U_SUCCESS(errorCode)) {
3157        showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3158        showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3159        showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3160    } else {
3161        log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3162    }
3163
3164    uset_close(set1);
3165    uset_close(set2);
3166    uset_close(set3);
3167    uset_close(set4);
3168
3169    /*
3170     * Check that each lowercase character has "small" in its name
3171     * and not "capital".
3172     * There are some such characters, some of which seem odd.
3173     * Use the verbose flag to see these notices.
3174     */
3175    errorCode=U_ZERO_ERROR;
3176    set1=uset_openPattern(lowerPattern, 13, &errorCode);
3177    if(U_SUCCESS(errorCode)) {
3178        for(i=0;; ++i) {
3179            length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3180            if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3181                break; /* done */
3182            }
3183            if(U_FAILURE(errorCode)) {
3184                log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3185                        i, u_errorName(errorCode));
3186                break;
3187            }
3188            if(length!=0) {
3189                break; /* done with code points, got a string or -1 */
3190            }
3191
3192            while(start<=end) {
3193                length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3194                if(U_FAILURE(errorCode)) {
3195                    log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3196                    errorCode=U_ZERO_ERROR;
3197                }
3198                if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3199                    strstr(buffer, "SMALL CAPITAL")==NULL
3200                ) {
3201                    log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3202                }
3203                ++start;
3204            }
3205        }
3206    } else {
3207        log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3208    }
3209    uset_close(set1);
3210
3211    /* verify that all assigned characters in Math blocks are exactly Math characters */
3212    errorCode=U_ZERO_ERROR;
3213    set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3214    set2=uset_openPattern(mathPattern, 8, &errorCode);
3215    set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3216    if(U_SUCCESS(errorCode)) {
3217        uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3218        uset_complement(set3);      /* assigned characters */
3219        uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3220        compareUSets(set1, set2,
3221                     "[assigned Math block chars]", "[math blocks]&[:Math:]",
3222                     TRUE);
3223    } else {
3224        log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3225    }
3226    uset_close(set1);
3227    uset_close(set2);
3228    uset_close(set3);
3229
3230    /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3231    errorCode=U_ZERO_ERROR;
3232    set1=uset_openPattern(unknownPattern, 14, &errorCode);
3233    set2=uset_openPattern(reservedPattern, 20, &errorCode);
3234    if(U_SUCCESS(errorCode)) {
3235        compareUSets(set1, set2,
3236                     "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3237                     TRUE);
3238    } else {
3239        log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3240    }
3241    uset_close(set1);
3242    uset_close(set2);
3243}
3244
3245/*
3246 * Starting with ICU4C 3.4, the core Unicode properties files
3247 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3248 * are hardcoded in the common DLL and therefore not included
3249 * in the data package any more.
3250 * Test requiring these files are disabled so that
3251 * we need not jump through hoops (like adding snapshots of these files
3252 * to testdata).
3253 * See Jitterbug 4497.
3254 */
3255#define HARDCODED_DATA_4497 1
3256
3257/* API coverage for ucase.c */
3258static void TestUCase() {
3259#if !HARDCODED_DATA_4497
3260    UDataMemory *pData;
3261    UCaseProps *csp;
3262    const UCaseProps *ccsp;
3263    UErrorCode errorCode;
3264
3265    /* coverage for ucase_openBinary() */
3266    errorCode=U_ZERO_ERROR;
3267    pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3268    if(U_FAILURE(errorCode)) {
3269        log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3270                    u_errorName(errorCode));
3271        return;
3272    }
3273
3274    csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3275    if(U_FAILURE(errorCode)) {
3276        log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3277                u_errorName(errorCode));
3278        udata_close(pData);
3279        return;
3280    }
3281
3282    if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3283        log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3284    }
3285
3286    ucase_close(csp);
3287    udata_close(pData);
3288
3289    /* coverage for ucase_getDummy() */
3290    errorCode=U_ZERO_ERROR;
3291    ccsp=ucase_getDummy(&errorCode);
3292    if(ucase_tolower(ccsp, 0x41)!=0x41) {
3293        log_err("ucase_tolower(dummy, A)!=A\n");
3294    }
3295#endif
3296}
3297
3298/* API coverage for ubidi_props.c */
3299static void TestUBiDiProps() {
3300#if !HARDCODED_DATA_4497
3301    UDataMemory *pData;
3302    UBiDiProps *bdp;
3303    const UBiDiProps *cbdp;
3304    UErrorCode errorCode;
3305
3306    /* coverage for ubidi_openBinary() */
3307    errorCode=U_ZERO_ERROR;
3308    pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3309    if(U_FAILURE(errorCode)) {
3310        log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3311                    u_errorName(errorCode));
3312        return;
3313    }
3314
3315    bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3316    if(U_FAILURE(errorCode)) {
3317        log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3318                u_errorName(errorCode));
3319        udata_close(pData);
3320        return;
3321    }
3322
3323    if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3324        log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3325    }
3326
3327    ubidi_closeProps(bdp);
3328    udata_close(pData);
3329
3330    /* coverage for ubidi_getDummy() */
3331    errorCode=U_ZERO_ERROR;
3332    cbdp=ubidi_getDummy(&errorCode);
3333    if(ubidi_getClass(cbdp, 0x20)!=0) {
3334        log_err("ubidi_getClass(dummy, space)!=0\n");
3335    }
3336#endif
3337}
3338
3339/* test case folding, compare return values with CaseFolding.txt ------------ */
3340
3341/* bit set for which case foldings for a character have been tested already */
3342enum {
3343    CF_SIMPLE=1,
3344    CF_FULL=2,
3345    CF_TURKIC=4,
3346    CF_ALL=7
3347};
3348
3349static void
3350testFold(UChar32 c, int which,
3351         UChar32 simple, UChar32 turkic,
3352         const UChar *full, int32_t fullLength,
3353         const UChar *turkicFull, int32_t turkicFullLength) {
3354    UChar s[2], t[32];
3355    UChar32 c2;
3356    int32_t length, length2;
3357
3358    UErrorCode errorCode=U_ZERO_ERROR;
3359
3360    length=0;
3361    U16_APPEND_UNSAFE(s, length, c);
3362
3363    if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3364        log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3365    }
3366    if((which&CF_FULL)!=0) {
3367        length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);
3368        if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3369            log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3370        }
3371    }
3372    if((which&CF_TURKIC)!=0) {
3373        if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3374            log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3375        }
3376
3377        length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3378        if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3379            log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3380        }
3381    }
3382}
3383
3384/* test that c case-folds to itself */
3385static void
3386testFoldToSelf(UChar32 c, int which) {
3387    UChar s[2];
3388    int32_t length;
3389
3390    length=0;
3391    U16_APPEND_UNSAFE(s, length, c);
3392    testFold(c, which, c, c, s, length, s, length);
3393}
3394
3395struct CaseFoldingData {
3396    USet *notSeen;
3397    UChar32 prev, prevSimple;
3398    UChar prevFull[32];
3399    int32_t prevFullLength;
3400    int which;
3401};
3402typedef struct CaseFoldingData CaseFoldingData;
3403
3404static void U_CALLCONV
3405caseFoldingLineFn(void *context,
3406                  char *fields[][2], int32_t fieldCount,
3407                  UErrorCode *pErrorCode) {
3408    CaseFoldingData *pData=(CaseFoldingData *)context;
3409    char *end;
3410    UChar full[32];
3411    UChar32 c, prev, simple;
3412    int32_t count;
3413    int which;
3414    char status;
3415
3416    /* get code point */
3417    const char *s=u_skipWhitespace(fields[0][0]);
3418    if(0==strncmp(s, "0000..10FFFF", 12)) {
3419        /*
3420         * Ignore the line
3421         * # @missing: 0000..10FFFF; C; <code point>
3422         * because maps-to-self is already our default, and this line breaks this parser.
3423         */
3424        return;
3425    }
3426    c=(UChar32)strtoul(s, &end, 16);
3427    end=(char *)u_skipWhitespace(end);
3428    if(end<=fields[0][0] || end!=fields[0][1]) {
3429        log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3430        *pErrorCode=U_PARSE_ERROR;
3431        return;
3432    }
3433
3434    /* get the status of this mapping */
3435    status=*u_skipWhitespace(fields[1][0]);
3436    if(status!='C' && status!='S' && status!='F' && status!='T') {
3437        log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3438        *pErrorCode=U_PARSE_ERROR;
3439        return;
3440    }
3441
3442    /* get the mapping */
3443    count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3444    if(U_FAILURE(*pErrorCode)) {
3445        log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3446        return;
3447    }
3448
3449    /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3450    if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3451        simple=c;
3452    }
3453
3454    if(c!=(prev=pData->prev)) {
3455        /*
3456         * Test remaining mappings for the previous code point.
3457         * If a turkic folding was not mentioned, then it should fold the same
3458         * as the regular simple case folding.
3459         */
3460        UChar prevString[2];
3461        int32_t length;
3462
3463        length=0;
3464        U16_APPEND_UNSAFE(prevString, length, prev);
3465        testFold(prev, (~pData->which)&CF_ALL,
3466                 prev, pData->prevSimple,
3467                 prevString, length,
3468                 pData->prevFull, pData->prevFullLength);
3469        pData->prev=pData->prevSimple=c;
3470        length=0;
3471        U16_APPEND_UNSAFE(pData->prevFull, length, c);
3472        pData->prevFullLength=length;
3473        pData->which=0;
3474    }
3475
3476    /*
3477     * Turn the status into a bit set of case foldings to test.
3478     * Remember non-Turkic case foldings as defaults for Turkic mode.
3479     */
3480    switch(status) {
3481    case 'C':
3482        which=CF_SIMPLE|CF_FULL;
3483        pData->prevSimple=simple;
3484        u_memcpy(pData->prevFull, full, count);
3485        pData->prevFullLength=count;
3486        break;
3487    case 'S':
3488        which=CF_SIMPLE;
3489        pData->prevSimple=simple;
3490        break;
3491    case 'F':
3492        which=CF_FULL;
3493        u_memcpy(pData->prevFull, full, count);
3494        pData->prevFullLength=count;
3495        break;
3496    case 'T':
3497        which=CF_TURKIC;
3498        break;
3499    default:
3500        which=0;
3501        break; /* won't happen because of test above */
3502    }
3503
3504    testFold(c, which, simple, simple, full, count, full, count);
3505
3506    /* remember which case foldings of c have been tested */
3507    pData->which|=which;
3508
3509    /* remove c from the set of ones not mentioned in CaseFolding.txt */
3510    uset_remove(pData->notSeen, c);
3511}
3512
3513static void
3514TestCaseFolding() {
3515    CaseFoldingData data={ NULL };
3516    char *fields[3][2];
3517    UErrorCode errorCode;
3518
3519    static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3520
3521    errorCode=U_ZERO_ERROR;
3522    /* test BMP & plane 1 - nothing interesting above */
3523    data.notSeen=uset_open(0, 0x1ffff);
3524    data.prevFullLength=1; /* length of full case folding of U+0000 */
3525
3526    parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3527    if(U_SUCCESS(errorCode)) {
3528        int32_t i, start, end;
3529
3530        /* add a pseudo-last line to finish testing of the actual last one */
3531        fields[0][0]=lastLine;
3532        fields[0][1]=lastLine+6;
3533        fields[1][0]=lastLine+7;
3534        fields[1][1]=lastLine+9;
3535        fields[2][0]=lastLine+10;
3536        fields[2][1]=lastLine+17;
3537        caseFoldingLineFn(&data, fields, 3, &errorCode);
3538
3539        /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3540        for(i=0;
3541            0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3542                U_SUCCESS(errorCode);
3543            ++i
3544        ) {
3545            do {
3546                testFoldToSelf(start, CF_ALL);
3547            } while(++start<=end);
3548        }
3549    }
3550
3551    uset_close(data.notSeen);
3552}
3553