1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CUCDTST.C
9*
10* Modification History:
11*        Name                     Description
12*     Madhu Katragadda            Ported for C API, added tests for string functions
13********************************************************************************
14*/
15
16#include <string.h>
17#include <math.h>
18#include <stdlib.h>
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/putil.h"
23#include "unicode/ustring.h"
24#include "unicode/uloc.h"
25#include "unicode/unorm2.h"
26
27#include "cintltst.h"
28#include "putilimp.h"
29#include "uparse.h"
30#include "ucase.h"
31#include "ubidi_props.h"
32#include "uprops.h"
33#include "uset_imp.h"
34#include "usc_impl.h"
35#include "udatamem.h" /* for testing ucase_openBinary() */
36#include "cucdapi.h"
37
38#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
39
40/* prototypes --------------------------------------------------------------- */
41
42static void TestUpperLower(void);
43static void TestLetterNumber(void);
44static void TestMisc(void);
45static void TestPOSIX(void);
46static void TestControlPrint(void);
47static void TestIdentifier(void);
48static void TestUnicodeData(void);
49static void TestCodeUnit(void);
50static void TestCodePoint(void);
51static void TestCharLength(void);
52static void TestCharNames(void);
53static void TestMirroring(void);
54static void TestUScriptRunAPI(void);
55static void TestAdditionalProperties(void);
56static void TestNumericProperties(void);
57static void TestPropertyNames(void);
58static void TestPropertyValues(void);
59static void TestConsistency(void);
60static void TestUCase(void);
61static void TestUBiDiProps(void);
62static void TestCaseFolding(void);
63
64/* internal methods used */
65static int32_t MakeProp(char* str);
66static int32_t MakeDir(char* str);
67
68/* helpers ------------------------------------------------------------------ */
69
70static void
71parseUCDFile(const char *filename,
72             char *fields[][2], int32_t fieldCount,
73             UParseLineFn *lineFn, void *context,
74             UErrorCode *pErrorCode) {
75    char path[256];
76    char backupPath[256];
77
78    if(U_FAILURE(*pErrorCode)) {
79        return;
80    }
81
82    /* Look inside ICU_DATA first */
83    strcpy(path, u_getDataDirectory());
84    strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
85    strcat(path, filename);
86
87    /* As a fallback, try to guess where the source data was located
88     *    at the time ICU was built, and look there.
89     */
90    strcpy(backupPath, ctest_dataSrcDir());
91    strcat(backupPath, U_FILE_SEP_STRING);
92    strcat(backupPath, "unidata" U_FILE_SEP_STRING);
93    strcat(backupPath, filename);
94
95    u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
96    if(*pErrorCode==U_FILE_ACCESS_ERROR) {
97        *pErrorCode=U_ZERO_ERROR;
98        u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
99    }
100    if(U_FAILURE(*pErrorCode)) {
101        log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
102    }
103}
104
105/* test data ---------------------------------------------------------------- */
106
107static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
108static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
109static const int32_t tagValues[] =
110    {
111    /* Mn */ U_NON_SPACING_MARK,
112    /* Mc */ U_COMBINING_SPACING_MARK,
113    /* Me */ U_ENCLOSING_MARK,
114    /* Nd */ U_DECIMAL_DIGIT_NUMBER,
115    /* Nl */ U_LETTER_NUMBER,
116    /* No */ U_OTHER_NUMBER,
117    /* Zs */ U_SPACE_SEPARATOR,
118    /* Zl */ U_LINE_SEPARATOR,
119    /* Zp */ U_PARAGRAPH_SEPARATOR,
120    /* Cc */ U_CONTROL_CHAR,
121    /* Cf */ U_FORMAT_CHAR,
122    /* Cs */ U_SURROGATE,
123    /* Co */ U_PRIVATE_USE_CHAR,
124    /* Cn */ U_UNASSIGNED,
125    /* Lu */ U_UPPERCASE_LETTER,
126    /* Ll */ U_LOWERCASE_LETTER,
127    /* Lt */ U_TITLECASE_LETTER,
128    /* Lm */ U_MODIFIER_LETTER,
129    /* Lo */ U_OTHER_LETTER,
130    /* Pc */ U_CONNECTOR_PUNCTUATION,
131    /* Pd */ U_DASH_PUNCTUATION,
132    /* Ps */ U_START_PUNCTUATION,
133    /* Pe */ U_END_PUNCTUATION,
134    /* Po */ U_OTHER_PUNCTUATION,
135    /* Sm */ U_MATH_SYMBOL,
136    /* Sc */ U_CURRENCY_SYMBOL,
137    /* Sk */ U_MODIFIER_SYMBOL,
138    /* So */ U_OTHER_SYMBOL,
139    /* Pi */ U_INITIAL_PUNCTUATION,
140    /* Pf */ U_FINAL_PUNCTUATION
141    };
142
143static const char dirStrings[][5] = {
144    "L",
145    "R",
146    "EN",
147    "ES",
148    "ET",
149    "AN",
150    "CS",
151    "B",
152    "S",
153    "WS",
154    "ON",
155    "LRE",
156    "LRO",
157    "AL",
158    "RLE",
159    "RLO",
160    "PDF",
161    "NSM",
162    "BN",
163    /* new in Unicode 6.3/ICU 52 */
164    "FSI",
165    "LRI",
166    "RLI",
167    "PDI"
168};
169
170void addUnicodeTest(TestNode** root);
171
172void addUnicodeTest(TestNode** root)
173{
174    addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
175    addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
176    addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
177    addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
178    addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
179    addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
180    addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
181    addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
182    addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
183    addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
184    addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
185    addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
186    addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
187    addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
188    addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
189    addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
190    addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
191    addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
192    addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
193    addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
194    addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
195    addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
196    addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
197    addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
198    addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
199    addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
200}
201
202/*==================================================== */
203/* test u_toupper() and u_tolower()                    */
204/*==================================================== */
205static void TestUpperLower()
206{
207    const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
208    const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
209    U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
210    U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
211    int32_t i;
212
213    U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
214    U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
215
216/*
217Checks LetterLike Symbols which were previously a source of confusion
218[Bertrand A. D. 02/04/98]
219*/
220    for (i=0x2100;i<0x2138;i++)
221    {
222        /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
223        if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
224        {
225            if (i != (int)u_tolower(i)) /* itself */
226                log_err("Failed case conversion with itself: U+%04x\n", i);
227            if (i != (int)u_toupper(i))
228                log_err("Failed case conversion with itself: U+%04x\n", i);
229        }
230    }
231
232    for(i=0; i < u_strlen(upper); i++){
233        if(u_tolower(upper[i]) != lower[i]){
234            log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
235        }
236    }
237
238    log_verbose("testing upper lower\n");
239    for (i = 0; i < 21; i++) {
240
241        if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
242        {
243            log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
244        }
245        else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
246         {
247            log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
248        }
249        else if (upperTest[i] != u_tolower(lowerTest[i]))
250        {
251            log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
252        }
253        else if (lowerTest[i] != u_toupper(upperTest[i]))
254         {
255            log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
256        }
257        else if (upperTest[i] != u_tolower(upperTest[i]))
258        {
259            log_err("Failed case conversion with itself: %c\n", upperTest[i]);
260        }
261        else if (lowerTest[i] != u_toupper(lowerTest[i]))
262        {
263            log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
264        }
265    }
266    log_verbose("done testing upper lower\n");
267
268    log_verbose("testing u_istitle\n");
269    {
270        static const UChar expected[] = {
271            0x1F88,
272            0x1F89,
273            0x1F8A,
274            0x1F8B,
275            0x1F8C,
276            0x1F8D,
277            0x1F8E,
278            0x1F8F,
279            0x1F88,
280            0x1F89,
281            0x1F8A,
282            0x1F8B,
283            0x1F8C,
284            0x1F8D,
285            0x1F8E,
286            0x1F8F,
287            0x1F98,
288            0x1F99,
289            0x1F9A,
290            0x1F9B,
291            0x1F9C,
292            0x1F9D,
293            0x1F9E,
294            0x1F9F,
295            0x1F98,
296            0x1F99,
297            0x1F9A,
298            0x1F9B,
299            0x1F9C,
300            0x1F9D,
301            0x1F9E,
302            0x1F9F,
303            0x1FA8,
304            0x1FA9,
305            0x1FAA,
306            0x1FAB,
307            0x1FAC,
308            0x1FAD,
309            0x1FAE,
310            0x1FAF,
311            0x1FA8,
312            0x1FA9,
313            0x1FAA,
314            0x1FAB,
315            0x1FAC,
316            0x1FAD,
317            0x1FAE,
318            0x1FAF,
319            0x1FBC,
320            0x1FBC,
321            0x1FCC,
322            0x1FCC,
323            0x1FFC,
324            0x1FFC,
325        };
326        int32_t num = sizeof(expected)/sizeof(expected[0]);
327        for(i=0; i<num; i++){
328            if(!u_istitle(expected[i])){
329                log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
330            }
331        }
332
333    }
334}
335
336/* compare two sets and verify that their difference or intersection is empty */
337static UBool
338showADiffB(const USet *a, const USet *b,
339           const char *a_name, const char *b_name,
340           UBool expect, UBool diffIsError) {
341    USet *aa;
342    int32_t i, start, end, length;
343    UErrorCode errorCode;
344
345    /*
346     * expect:
347     * TRUE  -> a-b should be empty, that is, b should contain all of a
348     * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
349     */
350    if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
351        return TRUE;
352    }
353
354    /* clone a to aa because a is const */
355    aa=uset_open(1, 0);
356    if(aa==NULL) {
357        /* unusual problem - out of memory? */
358        return FALSE;
359    }
360    uset_addAll(aa, a);
361
362    /* compute the set in question */
363    if(expect) {
364        /* a-b */
365        uset_removeAll(aa, b);
366    } else {
367        /* a&b */
368        uset_retainAll(aa, b);
369    }
370
371    /* aa is not empty because of the initial tests above; show its contents */
372    errorCode=U_ZERO_ERROR;
373    i=0;
374    for(;;) {
375        length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
376        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
377            break; /* done */
378        }
379        if(U_FAILURE(errorCode)) {
380            log_err("error comparing %s with %s at difference item %d: %s\n",
381                a_name, b_name, i, u_errorName(errorCode));
382            break;
383        }
384        if(length!=0) {
385            break; /* done with code points, got a string or -1 */
386        }
387
388        if(diffIsError) {
389            if(expect) {
390                log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
391            } else {
392                log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
393            }
394        } else {
395            if(expect) {
396                log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
397            } else {
398                log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
399            }
400        }
401
402        ++i;
403    }
404
405    uset_close(aa);
406    return FALSE;
407}
408
409static UBool
410showAMinusB(const USet *a, const USet *b,
411            const char *a_name, const char *b_name,
412            UBool diffIsError) {
413    return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
414}
415
416static UBool
417showAIntersectB(const USet *a, const USet *b,
418                const char *a_name, const char *b_name,
419                UBool diffIsError) {
420    return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
421}
422
423static UBool
424compareUSets(const USet *a, const USet *b,
425             const char *a_name, const char *b_name,
426             UBool diffIsError) {
427    /*
428     * Use an arithmetic & not a logical && so that both branches
429     * are always taken and all differences are shown.
430     */
431    return
432        showAMinusB(a, b, a_name, b_name, diffIsError) &
433        showAMinusB(b, a, b_name, a_name, diffIsError);
434}
435
436/* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
437static void TestLetterNumber()
438{
439    UChar i = 0x0000;
440
441    log_verbose("Testing for isalpha\n");
442    for (i = 0x0041; i < 0x005B; i++) {
443        if (!u_isalpha(i))
444        {
445            log_err("Failed isLetter test at  %.4X\n", i);
446        }
447    }
448    for (i = 0x0660; i < 0x066A; i++) {
449        if (u_isalpha(i))
450        {
451            log_err("Failed isLetter test with numbers at %.4X\n", i);
452        }
453    }
454
455    log_verbose("Testing for isdigit\n");
456    for (i = 0x0660; i < 0x066A; i++) {
457        if (!u_isdigit(i))
458        {
459            log_verbose("Failed isNumber test at %.4X\n", i);
460        }
461    }
462
463    log_verbose("Testing for isalnum\n");
464    for (i = 0x0041; i < 0x005B; i++) {
465        if (!u_isalnum(i))
466        {
467            log_err("Failed isAlNum test at  %.4X\n", i);
468        }
469    }
470    for (i = 0x0660; i < 0x066A; i++) {
471        if (!u_isalnum(i))
472        {
473            log_err("Failed isAlNum test at  %.4X\n", i);
474        }
475    }
476
477    {
478        /*
479         * The following checks work only starting from Unicode 4.0.
480         * Check the version number here.
481         */
482        static UVersionInfo u401={ 4, 0, 1, 0 };
483        UVersionInfo version;
484        u_getUnicodeVersion(version);
485        if(version[0]<4 || 0==memcmp(version, u401, 4)) {
486            return;
487        }
488    }
489
490    {
491        /*
492         * Sanity check:
493         * Verify that exactly the digit characters have decimal digit values.
494         * This assumption is used in the implementation of u_digit()
495         * (which checks nt=de)
496         * compared with the parallel java.lang.Character.digit()
497         * (which checks Nd).
498         *
499         * This was not true in Unicode 3.2 and earlier.
500         * Unicode 4.0 fixed discrepancies.
501         * Unicode 4.0.1 re-introduced problems in this area due to an
502         * unintentionally incomplete last-minute change.
503         */
504        U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
505        U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
506
507        USet *digits, *decimalValues;
508        UErrorCode errorCode;
509
510        U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
511        U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
512        errorCode=U_ZERO_ERROR;
513        digits=uset_openPattern(digitsPattern, 6, &errorCode);
514        decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
515
516        if(U_SUCCESS(errorCode)) {
517            compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
518        }
519
520        uset_close(digits);
521        uset_close(decimalValues);
522    }
523}
524
525static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
526                                const UChar32 *sampleChars, int32_t sampleCharsLength,
527                                UBool expected) {
528    int32_t i;
529    for (i = 0; i < sampleCharsLength; ++i) {
530        UBool result = propFn(sampleChars[i]);
531        if (result != expected) {
532            log_err("error: character property function %s(U+%04x)=%d is wrong\n",
533                    propName, sampleChars[i], result);
534        }
535    }
536}
537
538/* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
539static void TestMisc()
540{
541    static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
542    static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
543    static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
544    static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
545    static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
546    static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
547/*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
548    static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
549    static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
550    static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
551    static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
552
553    static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
554
555    uint32_t mask;
556
557    int32_t i;
558    char icuVersion[U_MAX_VERSION_STRING_LENGTH];
559    UVersionInfo realVersion;
560
561    memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
562
563    testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
564    testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
565
566    testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
567                        sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
568    testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
569                        sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
570
571    testSampleCharProps(u_isWhitespace, "u_isWhitespace",
572                        sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
573    testSampleCharProps(u_isWhitespace, "u_isWhitespace",
574                        sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
575
576    testSampleCharProps(u_isdefined, "u_isdefined",
577                        sampleDefined, LENGTHOF(sampleDefined), TRUE);
578    testSampleCharProps(u_isdefined, "u_isdefined",
579                        sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
580
581    testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
582    testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
583
584    testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
585    testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
586
587    for (i = 0; i < LENGTHOF(sampleDigits); i++) {
588        if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
589            log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
590                    sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
591        }
592    }
593
594    /* Tests the ICU version #*/
595    u_getVersion(realVersion);
596    u_versionToString(realVersion, icuVersion);
597    if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
598    {
599        log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
600    }
601#if defined(ICU_VERSION)
602    /* test only happens where we have configure.in with VERSION - sanity check. */
603    if(strcmp(U_ICU_VERSION, ICU_VERSION))
604    {
605        log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
606    }
607#endif
608
609    /* test U_GC_... */
610    if(
611        U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
612        U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
613        U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
614        U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
615        U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
616        U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
617    ) {
618        log_err("error: U_GET_GC_MASK does not work properly\n");
619    }
620
621    mask=0;
622    mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
623
624    mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
625    mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
626    mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
627    mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
628    mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
629
630    mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
631    mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
632    mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
633
634    mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
635    mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
636    mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
637
638    mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
639    mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
640    mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
641
642    mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
643    mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
644    mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
645    mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
646
647    mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
648    mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
649    mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
650    mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
651    mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
652
653    mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
654    mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
655    mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
656    mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
657
658    mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
659    mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
660
661    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
662        log_err("error: problems with U_GC_XX_MASK constants\n");
663    }
664
665    mask=0;
666    mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
667    mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
668    mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
669    mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
670    mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
671    mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
672    mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
673
674    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
675        log_err("error: problems with U_GC_Y_MASK constants\n");
676    }
677    {
678        static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
679        for(i=0; i<10; i++){
680            if(digit[i]!=u_forDigit(i,10)){
681                log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
682            }
683        }
684    }
685
686    /* test u_digit() */
687    {
688        static const struct {
689            UChar32 c;
690            int8_t radix, value;
691        } data[]={
692            /* base 16 */
693            { 0x0031, 16, 1 },
694            { 0x0038, 16, 8 },
695            { 0x0043, 16, 12 },
696            { 0x0066, 16, 15 },
697            { 0x00e4, 16, -1 },
698            { 0x0662, 16, 2 },
699            { 0x06f5, 16, 5 },
700            { 0xff13, 16, 3 },
701            { 0xff41, 16, 10 },
702
703            /* base 8 */
704            { 0x0031, 8, 1 },
705            { 0x0038, 8, -1 },
706            { 0x0043, 8, -1 },
707            { 0x0066, 8, -1 },
708            { 0x00e4, 8, -1 },
709            { 0x0662, 8, 2 },
710            { 0x06f5, 8, 5 },
711            { 0xff13, 8, 3 },
712            { 0xff41, 8, -1 },
713
714            /* base 36 */
715            { 0x5a, 36, 35 },
716            { 0x7a, 36, 35 },
717            { 0xff3a, 36, 35 },
718            { 0xff5a, 36, 35 },
719
720            /* wrong radix values */
721            { 0x0031, 1, -1 },
722            { 0xff3a, 37, -1 }
723        };
724
725        for(i=0; i<LENGTHOF(data); ++i) {
726            if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
727                log_err("u_digit(U+%04x, %d)=%d expected %d\n",
728                        data[i].c,
729                        data[i].radix,
730                        u_digit(data[i].c, data[i].radix),
731                        data[i].value);
732            }
733        }
734    }
735}
736
737/* test C/POSIX-style functions --------------------------------------------- */
738
739/* bit flags */
740#define ISAL     1
741#define ISLO     2
742#define ISUP     4
743
744#define ISDI     8
745#define ISXD  0x10
746
747#define ISAN  0x20
748
749#define ISPU  0x40
750#define ISGR  0x80
751#define ISPR 0x100
752
753#define ISSP 0x200
754#define ISBL 0x400
755#define ISCN 0x800
756
757/* C/POSIX-style functions, in the same order as the bit flags */
758typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
759
760static const struct {
761    IsPOSIXClass *fn;
762    const char *name;
763} posixClasses[]={
764    { u_isalpha, "isalpha" },
765    { u_islower, "islower" },
766    { u_isupper, "isupper" },
767    { u_isdigit, "isdigit" },
768    { u_isxdigit, "isxdigit" },
769    { u_isalnum, "isalnum" },
770    { u_ispunct, "ispunct" },
771    { u_isgraph, "isgraph" },
772    { u_isprint, "isprint" },
773    { u_isspace, "isspace" },
774    { u_isblank, "isblank" },
775    { u_iscntrl, "iscntrl" }
776};
777
778static const struct {
779    UChar32 c;
780    uint32_t posixResults;
781} posixData[]={
782    { 0x0008,                                                        ISCN },    /* backspace */
783    { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
784    { 0x000a,                                              ISSP|     ISCN },    /* LF */
785    { 0x000c,                                              ISSP|     ISCN },    /* FF */
786    { 0x000d,                                              ISSP|     ISCN },    /* CR */
787    { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
788    { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
789    { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
790    { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
791    { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
792    { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
793    { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
794    { 0x0085,                                              ISSP|     ISCN },    /* NEL */
795    { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
796    { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
797    { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
798    { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
799    { 0x0600,                                                        ISCN },    /* arabic number sign */
800    { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
801    { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
802    { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
803    { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
804    { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
805    { 0x200b,                                                        ISCN },    /* ZWSP */
806  /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
807    { 0x200e,                                                        ISCN },    /* LRM */
808    { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
809    { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
810    { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
811    { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
812    { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
813    { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
814    { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
815    { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
816};
817
818static void
819TestPOSIX() {
820    uint32_t mask;
821    int32_t cl, i;
822    UBool expect;
823
824    mask=1;
825    for(cl=0; cl<12; ++cl) {
826        for(i=0; i<LENGTHOF(posixData); ++i) {
827            expect=(UBool)((posixData[i].posixResults&mask)!=0);
828            if(posixClasses[cl].fn(posixData[i].c)!=expect) {
829                log_err("u_%s(U+%04x)=%s is wrong\n",
830                    posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
831            }
832        }
833        mask<<=1;
834    }
835}
836
837/* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
838static void TestControlPrint()
839{
840    const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
841    const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
842    const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
843    const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
844    UChar32 c;
845
846    testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
847    testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
848
849    testSampleCharProps(u_isprint, "u_isprint",
850                        samplePrintable, LENGTHOF(samplePrintable), TRUE);
851    testSampleCharProps(u_isprint, "u_isprint",
852                        sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
853
854    /* test all ISO 8 controls */
855    for(c=0; c<=0x9f; ++c) {
856        if(c==0x20) {
857            /* skip ASCII graphic characters and continue with DEL */
858            c=0x7f;
859        }
860        if(!u_iscntrl(c)) {
861            log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
862        }
863        if(!u_isISOControl(c)) {
864            log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
865        }
866        if(u_isprint(c)) {
867            log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
868        }
869    }
870
871    /* test all Latin-1 graphic characters */
872    for(c=0x20; c<=0xff; ++c) {
873        if(c==0x7f) {
874            c=0xa0;
875        } else if(c==0xad) {
876            /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
877            ++c;
878        }
879        if(!u_isprint(c)) {
880            log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
881        }
882    }
883}
884
885/* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
886static void TestIdentifier()
887{
888    const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
889    const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
890    const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
891    const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
892    const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
893    const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
894    const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
895    const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
896    const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
897    const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
898
899    testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
900                        sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
901    testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
902                        sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
903
904    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
905                        sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
906    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
907                        sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
908
909    /* IDPart should imply IDStart */
910    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
911                        sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
912
913    testSampleCharProps(u_isIDStart, "u_isIDStart",
914                        sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
915    testSampleCharProps(u_isIDStart, "u_isIDStart",
916                        sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
917
918    testSampleCharProps(u_isIDPart, "u_isIDPart",
919                        sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
920    testSampleCharProps(u_isIDPart, "u_isIDPart",
921                        sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
922
923    /* IDPart should imply IDStart */
924    testSampleCharProps(u_isIDPart, "u_isIDPart",
925                        sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
926
927    testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
928                        sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
929    testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
930                        sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
931}
932
933/* for each line of UnicodeData.txt, check some of the properties */
934typedef struct UnicodeDataContext {
935#if UCONFIG_NO_NORMALIZATION
936    const void *dummy;
937#else
938    const UNormalizer2 *nfc;
939    const UNormalizer2 *nfkc;
940#endif
941} UnicodeDataContext;
942
943/*
944 * ### TODO
945 * This test fails incorrectly if the First or Last code point of a repetitive area
946 * is overridden, which is allowed and is encouraged for the PUAs.
947 * Currently, this means that both area First/Last and override lines are
948 * tested against the properties from the API,
949 * and the area boundary will not match and cause an error.
950 *
951 * This function should detect area boundaries and skip them for the test of individual
952 * code points' properties.
953 * Then it should check that the areas contain all the same properties except where overridden.
954 * For this, it would have had to set a flag for which code points were listed explicitly.
955 */
956static void U_CALLCONV
957unicodeDataLineFn(void *context,
958                  char *fields[][2], int32_t fieldCount,
959                  UErrorCode *pErrorCode)
960{
961    char buffer[100];
962    const char *d;
963    char *end;
964    uint32_t value;
965    UChar32 c;
966    int32_t i;
967    int8_t type;
968    int32_t dt;
969    UChar dm[32], s[32];
970    int32_t dmLength, length;
971
972#if !UCONFIG_NO_NORMALIZATION
973    const UNormalizer2 *nfc, *nfkc;
974#endif
975
976    /* get the character code, field 0 */
977    c=strtoul(fields[0][0], &end, 16);
978    if(end<=fields[0][0] || end!=fields[0][1]) {
979        log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
980        return;
981    }
982    if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
983        log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
984        return;
985    }
986
987    /* get general category, field 2 */
988    *fields[2][1]=0;
989    type = (int8_t)tagValues[MakeProp(fields[2][0])];
990    if(u_charType(c)!=type) {
991        log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
992    }
993    if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
994        log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
995    }
996
997    /* get canonical combining class, field 3 */
998    value=strtoul(fields[3][0], &end, 10);
999    if(end<=fields[3][0] || end!=fields[3][1]) {
1000        log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1001        return;
1002    }
1003    if(value>255) {
1004        log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1005        return;
1006    }
1007#if !UCONFIG_NO_NORMALIZATION
1008    if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1009        log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1010    }
1011    nfkc=((UnicodeDataContext *)context)->nfkc;
1012    if(value!=unorm2_getCombiningClass(nfkc, c)) {
1013        log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1014    }
1015#endif
1016
1017    /* get BiDi category, field 4 */
1018    *fields[4][1]=0;
1019    i=MakeDir(fields[4][0]);
1020    if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1021        log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1022    }
1023
1024    /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1025    d=NULL;
1026    if(fields[5][0]==fields[5][1]) {
1027        /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1028        if(c==0xac00 || c==0xd7a3) {
1029            dt=U_DT_CANONICAL;
1030        } else {
1031            dt=U_DT_NONE;
1032        }
1033    } else {
1034        d=fields[5][0];
1035        *fields[5][1]=0;
1036        dt=UCHAR_INVALID_CODE;
1037        if(*d=='<') {
1038            end=strchr(++d, '>');
1039            if(end!=NULL) {
1040                *end=0;
1041                dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1042                d=u_skipWhitespace(end+1);
1043            }
1044        } else {
1045            dt=U_DT_CANONICAL;
1046        }
1047    }
1048    if(dt>U_DT_NONE) {
1049        if(c==0xac00) {
1050            dm[0]=0x1100;
1051            dm[1]=0x1161;
1052            dm[2]=0;
1053            dmLength=2;
1054        } else if(c==0xd7a3) {
1055            dm[0]=0xd788;
1056            dm[1]=0x11c2;
1057            dm[2]=0;
1058            dmLength=2;
1059        } else {
1060            dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1061        }
1062    } else {
1063        dmLength=-1;
1064    }
1065    if(dt<0 || U_FAILURE(*pErrorCode)) {
1066        log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1067        return;
1068    }
1069#if !UCONFIG_NO_NORMALIZATION
1070    i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1071    if(i!=dt) {
1072        log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1073    }
1074    /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1075    length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1076    if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1077        log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1078                "or the Decomposition_Mapping is different (%s)\n",
1079                c, length, dmLength, u_errorName(*pErrorCode));
1080        return;
1081    }
1082    /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1083    if(dt!=U_DT_CANONICAL) {
1084        dmLength=-1;
1085    }
1086    nfc=((UnicodeDataContext *)context)->nfc;
1087    length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1088    if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1089        log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1090                "or the Decomposition_Mapping is different (%s)\n",
1091                c, length, dmLength, u_errorName(*pErrorCode));
1092        return;
1093    }
1094    /* recompose */
1095    if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1096        UChar32 a, b, composite;
1097        i=0;
1098        U16_NEXT(dm, i, dmLength, a);
1099        U16_NEXT(dm, i, dmLength, b);
1100        /* i==dmLength */
1101        composite=unorm2_composePair(nfc, a, b);
1102        if(composite!=c) {
1103            log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1104                    (long)c, (long)a, (long)b, (long)composite);
1105        }
1106        /*
1107         * Note: NFKC has fewer round-trip mappings than NFC,
1108         * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1109         */
1110    }
1111#endif
1112
1113    /* get ISO Comment, field 11 */
1114    *fields[11][1]=0;
1115    i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1116    if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1117        log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1118            c, u_errorName(*pErrorCode),
1119            U_FAILURE(*pErrorCode) ? buffer : "[error]",
1120            fields[11][0]);
1121    }
1122
1123    /* get uppercase mapping, field 12 */
1124    if(fields[12][0]!=fields[12][1]) {
1125        value=strtoul(fields[12][0], &end, 16);
1126        if(end!=fields[12][1]) {
1127            log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1128            return;
1129        }
1130        if((UChar32)value!=u_toupper(c)) {
1131            log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1132        }
1133    } else {
1134        /* no case mapping: the API must map the code point to itself */
1135        if(c!=u_toupper(c)) {
1136            log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1137        }
1138    }
1139
1140    /* get lowercase mapping, field 13 */
1141    if(fields[13][0]!=fields[13][1]) {
1142        value=strtoul(fields[13][0], &end, 16);
1143        if(end!=fields[13][1]) {
1144            log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1145            return;
1146        }
1147        if((UChar32)value!=u_tolower(c)) {
1148            log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1149        }
1150    } else {
1151        /* no case mapping: the API must map the code point to itself */
1152        if(c!=u_tolower(c)) {
1153            log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1154        }
1155    }
1156
1157    /* get titlecase mapping, field 14 */
1158    if(fields[14][0]!=fields[14][1]) {
1159        value=strtoul(fields[14][0], &end, 16);
1160        if(end!=fields[14][1]) {
1161            log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1162            return;
1163        }
1164        if((UChar32)value!=u_totitle(c)) {
1165            log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1166        }
1167    } else {
1168        /* no case mapping: the API must map the code point to itself */
1169        if(c!=u_totitle(c)) {
1170            log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1171        }
1172    }
1173}
1174
1175static UBool U_CALLCONV
1176enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1177    static const UChar32 test[][2]={
1178        {0x41, U_UPPERCASE_LETTER},
1179        {0x308, U_NON_SPACING_MARK},
1180        {0xfffe, U_GENERAL_OTHER_TYPES},
1181        {0xe0041, U_FORMAT_CHAR},
1182        {0xeffff, U_UNASSIGNED}
1183    };
1184
1185    int32_t i, count;
1186
1187    if(0!=strcmp((const char *)context, "a1")) {
1188        log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1189        return FALSE;
1190    }
1191
1192    count=LENGTHOF(test);
1193    for(i=0; i<count; ++i) {
1194        if(start<=test[i][0] && test[i][0]<limit) {
1195            if(type!=(UCharCategory)test[i][1]) {
1196                log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1197                        start, limit, (long)type, test[i][0], test[i][1]);
1198            }
1199            /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1200            return i==(count-1) ? FALSE : TRUE;
1201        }
1202    }
1203
1204    if(start>test[count-1][0]) {
1205        log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1206                start, limit, (long)type);
1207        return FALSE;
1208    }
1209
1210    return TRUE;
1211}
1212
1213static UBool U_CALLCONV
1214enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1215    /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1216    static const int32_t defaultBidi[][2]={ /* { limit, class } */
1217        { 0x0590, U_LEFT_TO_RIGHT },
1218        { 0x0600, U_RIGHT_TO_LEFT },
1219        { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1220        { 0x08A0, U_RIGHT_TO_LEFT },
1221        { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1222        { 0x20A0, U_LEFT_TO_RIGHT },
1223        { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1224        { 0xFB1D, U_LEFT_TO_RIGHT },
1225        { 0xFB50, U_RIGHT_TO_LEFT },
1226        { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1227        { 0xFE70, U_LEFT_TO_RIGHT },
1228        { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1229        { 0x10800, U_LEFT_TO_RIGHT },
1230        { 0x11000, U_RIGHT_TO_LEFT },
1231        { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1232        { 0x1EE00, U_RIGHT_TO_LEFT },
1233        { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1234        { 0x1F000, U_RIGHT_TO_LEFT },
1235        { 0x110000, U_LEFT_TO_RIGHT }
1236    };
1237
1238    UChar32 c;
1239    int32_t i;
1240    UCharDirection shouldBeDir;
1241
1242    /*
1243     * LineBreak.txt specifies:
1244     *   #  - Assigned characters that are not listed explicitly are given the value
1245     *   #    "AL".
1246     *   #  - Unassigned characters are given the value "XX".
1247     *
1248     * PUA characters are listed explicitly with "XX".
1249     * Verify that no assigned character has "XX".
1250     */
1251    if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1252        c=start;
1253        while(c<limit) {
1254            if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1255                log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1256            }
1257            ++c;
1258        }
1259    }
1260
1261    /*
1262     * Verify default Bidi classes.
1263     * For recent Unicode versions, see UCD.html.
1264     *
1265     * For older Unicode versions:
1266     * See table 3-7 "Bidirectional Character Types" in UAX #9.
1267     * http://www.unicode.org/reports/tr9/
1268     *
1269     * See also DerivedBidiClass.txt for Cn code points!
1270     *
1271     * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1272     * changed some default values.
1273     * In particular, non-characters and unassigned Default Ignorable Code Points
1274     * change from L to BN.
1275     *
1276     * UCD.html version 4.0.1 does not yet reflect these changes.
1277     */
1278    if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1279        /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1280        c=start;
1281        for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1282            if((int32_t)c<defaultBidi[i][0]) {
1283                while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1284                    if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1285                        shouldBeDir=U_BOUNDARY_NEUTRAL;
1286                    } else {
1287                        shouldBeDir=(UCharDirection)defaultBidi[i][1];
1288                    }
1289
1290                    if( u_charDirection(c)!=shouldBeDir ||
1291                        u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1292                    ) {
1293                        log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1294                            c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1295                    }
1296                    ++c;
1297                }
1298            }
1299        }
1300    }
1301
1302    return TRUE;
1303}
1304
1305/* tests for several properties */
1306static void TestUnicodeData()
1307{
1308    UVersionInfo expectVersionArray;
1309    UVersionInfo versionArray;
1310    char *fields[15][2];
1311    UErrorCode errorCode;
1312    UChar32 c;
1313    int8_t type;
1314
1315    UnicodeDataContext context;
1316
1317    u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1318    u_getUnicodeVersion(versionArray);
1319    if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1320    {
1321        log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1322        versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1323    }
1324
1325#if defined(ICU_UNICODE_VERSION)
1326    /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1327    if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1328    {
1329         log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1330    }
1331#endif
1332
1333    if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1334        log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1335    }
1336
1337    errorCode=U_ZERO_ERROR;
1338#if !UCONFIG_NO_NORMALIZATION
1339    context.nfc=unorm2_getNFCInstance(&errorCode);
1340    context.nfkc=unorm2_getNFKCInstance(&errorCode);
1341    if(U_FAILURE(errorCode)) {
1342        log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1343        return;
1344    }
1345#endif
1346    parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1347    if(U_FAILURE(errorCode)) {
1348        return; /* if we couldn't parse UnicodeData.txt, we should return */
1349    }
1350
1351    /* sanity check on repeated properties */
1352    for(c=0xfffe; c<=0x10ffff;) {
1353        type=u_charType(c);
1354        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1355            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1356        }
1357        if(type!=U_UNASSIGNED) {
1358            log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1359        }
1360        if((c&0xffff)==0xfffe) {
1361            ++c;
1362        } else {
1363            c+=0xffff;
1364        }
1365    }
1366
1367    /* test that PUA is not "unassigned" */
1368    for(c=0xe000; c<=0x10fffd;) {
1369        type=u_charType(c);
1370        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1371            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1372        }
1373        if(type==U_UNASSIGNED) {
1374            log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1375        } else if(type!=U_PRIVATE_USE_CHAR) {
1376            log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1377        }
1378        if(c==0xf8ff) {
1379            c=0xf0000;
1380        } else if(c==0xffffd) {
1381            c=0x100000;
1382        } else {
1383            ++c;
1384        }
1385    }
1386
1387    /* test u_enumCharTypes() */
1388    u_enumCharTypes(enumTypeRange, "a1");
1389
1390    /* check default properties */
1391    u_enumCharTypes(enumDefaultsRange, NULL);
1392}
1393
1394static void TestCodeUnit(){
1395    const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1396
1397    int32_t i;
1398
1399    for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1400        UChar c=codeunit[i];
1401        if(i<4){
1402            if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1403                log_err("ERROR: U+%04x is a single", c);
1404            }
1405
1406        }
1407        if(i >= 4 && i< 8){
1408            if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1409                log_err("ERROR: U+%04x is a first surrogate", c);
1410            }
1411        }
1412        if(i >= 8 && i< 12){
1413            if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1414                log_err("ERROR: U+%04x is a second surrogate", c);
1415            }
1416        }
1417    }
1418
1419}
1420
1421static void TestCodePoint(){
1422    const UChar32 codePoint[]={
1423        /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1424        0xd800,
1425        0xdbff,
1426        0xdc00,
1427        0xdfff,
1428        0xdc04,
1429        0xd821,
1430        /*not a surrogate, valid, isUnicodeChar , not Error*/
1431        0x20ac,
1432        0xd7ff,
1433        0xe000,
1434        0xe123,
1435        0x0061,
1436        0xe065,
1437        0x20402,
1438        0x24506,
1439        0x23456,
1440        0x20402,
1441        0x10402,
1442        0x23456,
1443        /*not a surrogate, not valid, isUnicodeChar, isError */
1444        0x0015,
1445        0x009f,
1446        /*not a surrogate, not valid, not isUnicodeChar, isError */
1447        0xffff,
1448        0xfffe,
1449    };
1450    int32_t i;
1451    for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1452        UChar32 c=codePoint[i];
1453        if(i<6){
1454            if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1455                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1456            }
1457            if(UTF_IS_VALID(c)){
1458                log_err("ERROR: isValid() failed for U+%04x\n", c);
1459            }
1460            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1461                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1462            }
1463            if(UTF_IS_ERROR(c)){
1464                log_err("ERROR: isError() failed for U+%04x\n", c);
1465            }
1466        }else if(i >=6 && i<18){
1467            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1468                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1469            }
1470            if(!UTF_IS_VALID(c)){
1471                log_err("ERROR: isValid() failed for U+%04x\n", c);
1472            }
1473            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1474                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1475            }
1476            if(UTF_IS_ERROR(c)){
1477                log_err("ERROR: isError() failed for U+%04x\n", c);
1478            }
1479        }else if(i >=18 && i<20){
1480            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1481                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1482            }
1483            if(UTF_IS_VALID(c)){
1484                log_err("ERROR: isValid() failed for U+%04x\n", c);
1485            }
1486            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1487                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1488            }
1489            if(!UTF_IS_ERROR(c)){
1490                log_err("ERROR: isError() failed for U+%04x\n", c);
1491            }
1492        }
1493        else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1494            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1495                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1496            }
1497            if(UTF_IS_VALID(c)){
1498                log_err("ERROR: isValid() failed for U+%04x\n", c);
1499            }
1500            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1501                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1502            }
1503            if(!UTF_IS_ERROR(c)){
1504                log_err("ERROR: isError() failed for U+%04x\n", c);
1505            }
1506        }
1507    }
1508
1509    if(
1510        !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1511        !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1512        U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1513        U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1514    ) {
1515        log_err("error with U_IS_BMP()\n");
1516    }
1517
1518    if(
1519        U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1520        U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1521        U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1522        !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1523    ) {
1524        log_err("error with U_IS_SUPPLEMENTARY()\n");
1525    }
1526}
1527
1528static void TestCharLength()
1529{
1530    const int32_t codepoint[]={
1531        1, 0x0061,
1532        1, 0xe065,
1533        1, 0x20ac,
1534        2, 0x20402,
1535        2, 0x23456,
1536        2, 0x24506,
1537        2, 0x20402,
1538        2, 0x10402,
1539        1, 0xd7ff,
1540        1, 0xe000
1541    };
1542
1543    int32_t i;
1544    UBool multiple;
1545    for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1546        UChar32 c=codepoint[i+1];
1547        if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1548            log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1549        }
1550        multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1551        if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1552            log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1553        }
1554    }
1555}
1556
1557/*internal functions ----*/
1558static int32_t MakeProp(char* str)
1559{
1560    int32_t result = 0;
1561    char* matchPosition =0;
1562
1563    matchPosition = strstr(tagStrings, str);
1564    if (matchPosition == 0)
1565    {
1566        log_err("unrecognized type letter ");
1567        log_err(str);
1568    }
1569    else
1570        result = (int32_t)((matchPosition - tagStrings) / 2);
1571    return result;
1572}
1573
1574static int32_t MakeDir(char* str)
1575{
1576    int32_t pos = 0;
1577    for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
1578        if (strcmp(str, dirStrings[pos]) == 0) {
1579            return pos;
1580        }
1581    }
1582    return -1;
1583}
1584
1585/* test u_charName() -------------------------------------------------------- */
1586
1587static const struct {
1588    uint32_t code;
1589    const char *name, *oldName, *extName, *alias;
1590} names[]={
1591    {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1592    {0x01a2, "LATIN CAPITAL LETTER OI", "",
1593             "LATIN CAPITAL LETTER OI",
1594             "LATIN CAPITAL LETTER GHA"},
1595    {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1596             "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1597    {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1598             "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1599             "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1600    {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1601    {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1602    {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1603    {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1604    {0xd800, "", "", "<lead surrogate-D800>" },
1605    {0xdc00, "", "", "<trail surrogate-DC00>" },
1606    {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1607    {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1608    {0xffff, "", "", "<noncharacter-FFFF>" },
1609    {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1610              "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1611              "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1612    {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1613};
1614
1615static UBool
1616enumCharNamesFn(void *context,
1617                UChar32 code, UCharNameChoice nameChoice,
1618                const char *name, int32_t length) {
1619    int32_t *pCount=(int32_t *)context;
1620    const char *expected;
1621    int i;
1622
1623    if(length<=0 || length!=(int32_t)strlen(name)) {
1624        /* should not be called with an empty string or invalid length */
1625        log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1626        return TRUE;
1627    }
1628
1629    ++*pCount;
1630    for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1631        if(code==(UChar32)names[i].code) {
1632            switch (nameChoice) {
1633                case U_EXTENDED_CHAR_NAME:
1634                    if(0!=strcmp(name, names[i].extName)) {
1635                        log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1636                    }
1637                    break;
1638                case U_UNICODE_CHAR_NAME:
1639                    if(0!=strcmp(name, names[i].name)) {
1640                        log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1641                    }
1642                    break;
1643                case U_UNICODE_10_CHAR_NAME:
1644                    expected=names[i].oldName;
1645                    if(expected[0]==0 || 0!=strcmp(name, expected)) {
1646                        log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1647                    }
1648                    break;
1649                case U_CHAR_NAME_ALIAS:
1650                    expected=names[i].alias;
1651                    if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1652                        log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1653                    }
1654                    break;
1655                case U_CHAR_NAME_CHOICE_COUNT:
1656                    break;
1657            }
1658            break;
1659        }
1660    }
1661    return TRUE;
1662}
1663
1664struct enumExtCharNamesContext {
1665    uint32_t length;
1666    int32_t last;
1667};
1668
1669static UBool
1670enumExtCharNamesFn(void *context,
1671                UChar32 code, UCharNameChoice nameChoice,
1672                const char *name, int32_t length) {
1673    struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1674
1675    if (ecncp->last != (int32_t) code - 1) {
1676        if (ecncp->last < 0) {
1677            log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1678        } else {
1679            log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1680        }
1681    }
1682    ecncp->last = (int32_t) code;
1683
1684    if (!*name) {
1685        log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1686    }
1687
1688    return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1689}
1690
1691/**
1692 * This can be made more efficient by moving it into putil.c and having
1693 * it directly access the ebcdic translation tables.
1694 * TODO: If we get this method in putil.c, then delete it from here.
1695 */
1696static UChar
1697u_charToUChar(char c) {
1698    UChar uc;
1699    u_charsToUChars(&c, &uc, 1);
1700    return uc;
1701}
1702
1703static void
1704TestCharNames() {
1705    static char name[80];
1706    UErrorCode errorCode=U_ZERO_ERROR;
1707    struct enumExtCharNamesContext extContext;
1708    const char *expected;
1709    int32_t length;
1710    UChar32 c;
1711    int32_t i;
1712
1713    log_verbose("Testing uprv_getMaxCharNameLength()\n");
1714    length=uprv_getMaxCharNameLength();
1715    if(length==0) {
1716        /* no names data available */
1717        return;
1718    }
1719    if(length<83) { /* Unicode 3.2 max char name length */
1720        log_err("uprv_getMaxCharNameLength()=%d is too short");
1721    }
1722    /* ### TODO same tests for max ISO comment length as for max name length */
1723
1724    log_verbose("Testing u_charName()\n");
1725    for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1726        /* modern Unicode character name */
1727        length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1728        if(U_FAILURE(errorCode)) {
1729            log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1730            return;
1731        }
1732        if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1733            log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1734        }
1735
1736        /* find the modern name */
1737        if (*names[i].name) {
1738            c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1739            if(U_FAILURE(errorCode)) {
1740                log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1741                return;
1742            }
1743            if(c!=(UChar32)names[i].code) {
1744                log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1745            }
1746        }
1747
1748        /* Unicode 1.0 character name */
1749        length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1750        if(U_FAILURE(errorCode)) {
1751            log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1752            return;
1753        }
1754        if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1755            log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1756        }
1757
1758        /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1759        if(names[i].oldName[0]!=0 /* && length>0 */) {
1760            c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1761            if(U_FAILURE(errorCode)) {
1762                log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1763                return;
1764            }
1765            if(c!=(UChar32)names[i].code) {
1766                log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1767            }
1768        }
1769
1770        /* Unicode character name alias */
1771        length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1772        if(U_FAILURE(errorCode)) {
1773            log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1774            return;
1775        }
1776        expected=names[i].alias;
1777        if(expected==NULL) {
1778            expected="";
1779        }
1780        if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1781            log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1782                    names[i].code, name, length, expected);
1783        }
1784
1785        /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1786        if(expected[0]!=0 /* && length>0 */) {
1787            c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1788            if(U_FAILURE(errorCode)) {
1789                log_err("u_charFromName(%s - alias) error %s\n",
1790                        expected, u_errorName(errorCode));
1791                return;
1792            }
1793            if(c!=(UChar32)names[i].code) {
1794                log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1795                        expected, c, names[i].code);
1796            }
1797        }
1798    }
1799
1800    /* test u_enumCharNames() */
1801    length=0;
1802    errorCode=U_ZERO_ERROR;
1803    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1804    if(U_FAILURE(errorCode) || length<94140) {
1805        log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1806    }
1807
1808    extContext.length = 0;
1809    extContext.last = -1;
1810    errorCode=U_ZERO_ERROR;
1811    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1812    if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1813        log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1814    }
1815
1816    /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1817    if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1818        log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1819    }
1820
1821    /* Test getCharNameCharacters */
1822    if(!getTestOption(QUICK_OPTION)) {
1823        enum { BUFSIZE = 256 };
1824        UErrorCode ec = U_ZERO_ERROR;
1825        char buf[BUFSIZE];
1826        int32_t maxLength;
1827        UChar32 cp;
1828        UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1829        int32_t l1, l2;
1830        UBool map[256];
1831        UBool ok;
1832
1833        USet* set = uset_open(1, 0); /* empty set */
1834        USet* dumb = uset_open(1, 0); /* empty set */
1835
1836        /*
1837         * uprv_getCharNameCharacters() will likely return more lowercase
1838         * letters than actual character names contain because
1839         * it includes all the characters in lowercased names of
1840         * general categories, for the full possible set of extended names.
1841         */
1842        {
1843            USetAdder sa={
1844                NULL,
1845                uset_add,
1846                uset_addRange,
1847                uset_addString,
1848                NULL /* don't need remove() */
1849            };
1850            sa.set=set;
1851            uprv_getCharNameCharacters(&sa);
1852        }
1853
1854        /* build set the dumb (but sure-fire) way */
1855        for (i=0; i<256; ++i) {
1856            map[i] = FALSE;
1857        }
1858
1859        maxLength=0;
1860        for (cp=0; cp<0x110000; ++cp) {
1861            int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1862                                     buf, BUFSIZE, &ec);
1863            if (U_FAILURE(ec)) {
1864                log_err("FAIL: u_charName failed when it shouldn't\n");
1865                uset_close(set);
1866                uset_close(dumb);
1867                return;
1868            }
1869            if(len>maxLength) {
1870                maxLength=len;
1871            }
1872
1873            for (i=0; i<len; ++i) {
1874                if (!map[(uint8_t) buf[i]]) {
1875                    uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1876                    map[(uint8_t) buf[i]] = TRUE;
1877                }
1878            }
1879
1880            /* test for leading/trailing whitespace */
1881            if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1882                log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1883            }
1884        }
1885
1886        if(map[(uint8_t)'\t']) {
1887            log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1888        }
1889
1890        length=uprv_getMaxCharNameLength();
1891        if(length!=maxLength) {
1892            log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1893                    length, maxLength);
1894        }
1895
1896        /* compare the sets.  Where is my uset_equals?!! */
1897        ok=TRUE;
1898        for(i=0; i<256; ++i) {
1899            if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1900                if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1901                    /* ignore lowercase a-z that are in set but not in dumb */
1902                    ok=TRUE;
1903                } else {
1904                    ok=FALSE;
1905                    break;
1906                }
1907            }
1908        }
1909
1910        l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1911        l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1912        if (U_FAILURE(ec)) {
1913            log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1914            uset_close(set);
1915            uset_close(dumb);
1916            return;
1917        }
1918
1919        if (l1 >= BUFSIZE) {
1920            l1 = BUFSIZE-1;
1921            pat[l1] = 0;
1922        }
1923        if (l2 >= BUFSIZE) {
1924            l2 = BUFSIZE-1;
1925            dumbPat[l2] = 0;
1926        }
1927
1928        if (!ok) {
1929            log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1930                    aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1931        } else if(getTestOption(VERBOSITY_OPTION)) {
1932            log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1933        }
1934
1935        uset_close(set);
1936        uset_close(dumb);
1937    }
1938
1939    /* ### TODO: test error cases and other interesting things */
1940}
1941
1942/* test u_isMirrored() and u_charMirror() ----------------------------------- */
1943
1944static void
1945TestMirroring() {
1946    USet *set;
1947    UErrorCode errorCode;
1948
1949    UChar32 start, end, c2, c3;
1950    int32_t i;
1951
1952    U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1953
1954    U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1955
1956    log_verbose("Testing u_isMirrored()\n");
1957    if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1958         !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1959        )
1960    ) {
1961        log_err("u_isMirrored() does not work correctly\n");
1962    }
1963
1964    log_verbose("Testing u_charMirror()\n");
1965    if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1966         u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1967         u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1968         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1969         u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1970         )
1971    ) {
1972        log_err("u_charMirror() does not work correctly\n");
1973    }
1974
1975    /* verify that Bidi_Mirroring_Glyph roundtrips */
1976    errorCode=U_ZERO_ERROR;
1977    set=uset_openPattern(mirroredPattern, 17, &errorCode);
1978
1979    if (U_FAILURE(errorCode)) {
1980        log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
1981    } else {
1982        for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1983            do {
1984                c2=u_charMirror(start);
1985                c3=u_charMirror(c2);
1986                if(c3!=start) {
1987                    log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1988                }
1989                c3=u_getBidiPairedBracket(start);
1990                if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
1991                    if(c3!=start) {
1992                        log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
1993                                (long)start);
1994                    }
1995                } else {
1996                    if(c3!=c2) {
1997                        log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
1998                                (long)start, (long)c2);
1999                    }
2000                }
2001            } while(++start<=end);
2002        }
2003    }
2004
2005    uset_close(set);
2006}
2007
2008
2009struct RunTestData
2010{
2011    const char *runText;
2012    UScriptCode runCode;
2013};
2014
2015typedef struct RunTestData RunTestData;
2016
2017static void
2018CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2019                const char *prefix)
2020{
2021    int32_t run, runStart, runLimit;
2022    UScriptCode runCode;
2023
2024    /* iterate over all the runs */
2025    run = 0;
2026    while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2027        if (runStart != runStarts[run]) {
2028            log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2029                prefix, run, runStarts[run], runStart);
2030        }
2031
2032        if (runLimit != runStarts[run + 1]) {
2033            log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2034                prefix, run, runStarts[run + 1], runLimit);
2035        }
2036
2037        if (runCode != testData[run].runCode) {
2038            log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2039                prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2040        }
2041
2042        run += 1;
2043
2044        /* stop when we've seen all the runs we expect to see */
2045        if (run >= nRuns) {
2046            break;
2047        }
2048    }
2049
2050    /* Complain if we didn't see then number of runs we expected */
2051    if (run != nRuns) {
2052        log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2053    }
2054}
2055
2056static void
2057TestUScriptRunAPI()
2058{
2059    static const RunTestData testData1[] = {
2060        {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2061        {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2062        {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2063        {"English (", USCRIPT_LATIN},
2064        {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2065        {") ", USCRIPT_LATIN},
2066        {"\\u6F22\\u5B75", USCRIPT_HAN},
2067        {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2068        {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2069        {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2070    };
2071
2072    static const RunTestData testData2[] = {
2073       {"((((((((((abc))))))))))", USCRIPT_LATIN}
2074    };
2075
2076    static const struct {
2077      const RunTestData *testData;
2078      int32_t nRuns;
2079    } testDataEntries[] = {
2080        {testData1, LENGTHOF(testData1)},
2081        {testData2, LENGTHOF(testData2)}
2082    };
2083
2084    static const int32_t nTestEntries = LENGTHOF(testDataEntries);
2085    int32_t testEntry;
2086
2087    for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2088        UChar testString[1024];
2089        int32_t runStarts[256];
2090        int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2091        const RunTestData *testData = testDataEntries[testEntry].testData;
2092
2093        int32_t run, stringLimit;
2094        UScriptRun *scriptRun = NULL;
2095        UErrorCode err;
2096
2097        /*
2098         * Fill in the test string and the runStarts array.
2099         */
2100        stringLimit = 0;
2101        for (run = 0; run < nTestRuns; run += 1) {
2102            runStarts[run] = stringLimit;
2103            stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2104            /*stringLimit -= 1;*/
2105        }
2106
2107        /* The limit of the last run */
2108        runStarts[nTestRuns] = stringLimit;
2109
2110        /*
2111         * Make sure that calling uscript_OpenRun with a NULL text pointer
2112         * and a non-zero text length returns the correct error.
2113         */
2114        err = U_ZERO_ERROR;
2115        scriptRun = uscript_openRun(NULL, stringLimit, &err);
2116
2117        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2118            log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2119        }
2120
2121        if (scriptRun != NULL) {
2122            log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2123            uscript_closeRun(scriptRun);
2124        }
2125
2126        /*
2127         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2128         * and a zero text length returns the correct error.
2129         */
2130        err = U_ZERO_ERROR;
2131        scriptRun = uscript_openRun(testString, 0, &err);
2132
2133        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2134            log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2135        }
2136
2137        if (scriptRun != NULL) {
2138            log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2139            uscript_closeRun(scriptRun);
2140        }
2141
2142        /*
2143         * Make sure that calling uscript_openRun with a NULL text pointer
2144         * and a zero text length doesn't return an error.
2145         */
2146        err = U_ZERO_ERROR;
2147        scriptRun = uscript_openRun(NULL, 0, &err);
2148
2149        if (U_FAILURE(err)) {
2150            log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2151        }
2152
2153        /* Make sure that the empty iterator doesn't find any runs */
2154        if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2155            log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2156        }
2157
2158        /*
2159         * Make sure that calling uscript_setRunText with a NULL text pointer
2160         * and a non-zero text length returns the correct error.
2161         */
2162        err = U_ZERO_ERROR;
2163        uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2164
2165        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2166            log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2167        }
2168
2169        /*
2170         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2171         * and a zero text length returns the correct error.
2172         */
2173        err = U_ZERO_ERROR;
2174        uscript_setRunText(scriptRun, testString, 0, &err);
2175
2176        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2177            log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2178        }
2179
2180        /*
2181         * Now call uscript_setRunText on the empty iterator
2182         * and make sure that it works.
2183         */
2184        err = U_ZERO_ERROR;
2185        uscript_setRunText(scriptRun, testString, stringLimit, &err);
2186
2187        if (U_FAILURE(err)) {
2188            log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2189        } else {
2190            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2191        }
2192
2193        uscript_closeRun(scriptRun);
2194
2195        /*
2196         * Now open an interator over the testString
2197         * using uscript_openRun and make sure that it works
2198         */
2199        scriptRun = uscript_openRun(testString, stringLimit, &err);
2200
2201        if (U_FAILURE(err)) {
2202            log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2203        } else {
2204            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2205        }
2206
2207        /* Now reset the iterator, and make sure
2208         * that it still works.
2209         */
2210        uscript_resetRun(scriptRun);
2211
2212        CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2213
2214        /* Close the iterator */
2215        uscript_closeRun(scriptRun);
2216    }
2217}
2218
2219/* test additional, non-core properties */
2220static void
2221TestAdditionalProperties() {
2222    /* test data for u_charAge() */
2223    static const struct {
2224        UChar32 c;
2225        UVersionInfo version;
2226    } charAges[]={
2227        {0x41,    { 1, 1, 0, 0 }},
2228        {0xffff,  { 1, 1, 0, 0 }},
2229        {0x20ab,  { 2, 0, 0, 0 }},
2230        {0x2fffe, { 2, 0, 0, 0 }},
2231        {0x20ac,  { 2, 1, 0, 0 }},
2232        {0xfb1d,  { 3, 0, 0, 0 }},
2233        {0x3f4,   { 3, 1, 0, 0 }},
2234        {0x10300, { 3, 1, 0, 0 }},
2235        {0x220,   { 3, 2, 0, 0 }},
2236        {0xff60,  { 3, 2, 0, 0 }}
2237    };
2238
2239    /* test data for u_hasBinaryProperty() */
2240    static const int32_t
2241    props[][3]={ /* code point, property, value */
2242        { 0x0627, UCHAR_ALPHABETIC, TRUE },
2243        { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2244        { 0x2028, UCHAR_ALPHABETIC, FALSE },
2245
2246        { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2247        { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2248
2249        { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2250        { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2251
2252        { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2253        { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2254
2255        /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2256        { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2257        { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2258        { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2259        { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2260
2261        { 0x058a, UCHAR_DASH, TRUE },
2262        { 0x007e, UCHAR_DASH, FALSE },
2263
2264        { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2265        { 0x3000, UCHAR_DIACRITIC, FALSE },
2266
2267        { 0x0e46, UCHAR_EXTENDER, TRUE },
2268        { 0x0020, UCHAR_EXTENDER, FALSE },
2269
2270#if !UCONFIG_NO_NORMALIZATION
2271        { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2272        { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2273        { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2274
2275        { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2276        { 0x0308, UCHAR_NFD_INERT, FALSE },
2277
2278        { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2279        { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2280
2281        { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2282        { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2283        { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2284        { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2285        { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2286        { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2287
2288        { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2289        { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2290
2291        { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2292        { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2293        { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2294        { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2295        { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2296        { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2297#endif
2298
2299        { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2300        { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2301        { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2302
2303        { 0x30fb, UCHAR_HYPHEN, TRUE },
2304        { 0xfe58, UCHAR_HYPHEN, FALSE },
2305
2306        { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2307        { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2308        { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2309
2310        { 0x2172, UCHAR_ID_START, TRUE },
2311        { 0x007a, UCHAR_ID_START, TRUE },
2312        { 0x0039, UCHAR_ID_START, FALSE },
2313
2314        { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2315        { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2316        { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2317
2318        { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2319        { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2320
2321        { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2322        { 0x0345, UCHAR_LOWERCASE, TRUE },
2323        { 0x0030, UCHAR_LOWERCASE, FALSE },
2324
2325        { 0x1d7a9, UCHAR_MATH, TRUE },
2326        { 0x2135, UCHAR_MATH, TRUE },
2327        { 0x0062, UCHAR_MATH, FALSE },
2328
2329        { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2330        { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2331        { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2332
2333        { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2334        { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2335        { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2336
2337        { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2338        { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2339
2340        { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2341        { 0x2162, UCHAR_UPPERCASE, TRUE },
2342        { 0x0345, UCHAR_UPPERCASE, FALSE },
2343
2344        { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2345        { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2346        { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2347
2348        { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2349        { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2350        { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2351
2352        { 0x16ee, UCHAR_XID_START, TRUE },
2353        { 0x23456, UCHAR_XID_START, TRUE },
2354        { 0x1d1aa, UCHAR_XID_START, FALSE },
2355
2356        /*
2357         * Version break:
2358         * The following properties are only supported starting with the
2359         * Unicode version indicated in the second field.
2360         */
2361        { -1, 0x320, 0 },
2362
2363        { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2364        { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2365        { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2366
2367        { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2368        { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2369        { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2370        { 0xe0100, UCHAR_DEPRECATED, FALSE },
2371
2372        { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2373        { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2374        { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2375        { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2376
2377        { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2378        { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2379        { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2380        { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2381
2382        { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2383        { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2384
2385        { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2386        { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2387
2388        { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2389        { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2390
2391        { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2392        { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2393
2394        { 0x2e9b, UCHAR_RADICAL, TRUE },
2395        { 0x4e00, UCHAR_RADICAL, FALSE },
2396
2397        { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2398        { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2399
2400        { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2401        { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2402
2403        { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2404
2405        { 0x002e, UCHAR_S_TERM, TRUE },
2406        { 0x0061, UCHAR_S_TERM, FALSE },
2407
2408        { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2409        { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2410        { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2411        { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2412
2413        /* enum/integer type properties */
2414
2415        /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2416        /* test default Bidi classes for unassigned code points */
2417        { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2418        { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2419        { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2420        { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2421        { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2422        { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2423        { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2424        { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2425        { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2426        { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2427        { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2428
2429        { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2430        { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2431        { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2432        { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2433        { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2434        { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2435        { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2436        { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2437
2438        { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2439        { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2440        { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2441        { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2442        { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2443        { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2444        { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2445        { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2446        { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2447        { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2448        { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2449
2450        /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2451        { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2452
2453        { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2454        { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2455        { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2456        { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2457        { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2458        { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2459        { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2460        { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2461        { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2462
2463        { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2464        { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2465        { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2466        { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2467        { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2468        { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2469        { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2470        { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2471        { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2472        { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2473        { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2474        { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2475        { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2476        { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2477        { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2478        { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2479        { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2480
2481        /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2482        { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2483        { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2484
2485        { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2486        { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2487        { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2488        { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2489        { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2490
2491        { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2492        { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2493        { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2494        { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2495        { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2496        { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2497        { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2498        { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2499
2500        /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2501        { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2502        { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2503        { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2504        { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2505        { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2506        { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2507        { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2508        { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2509        { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2510        { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2511        { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2512        { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2513        { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2514        { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2515        { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2516
2517        /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2518
2519        /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2520
2521        { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2522        { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2523        { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2524        { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2525        { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2526        { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2527        { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2528
2529        { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2530        { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2531        { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2532        { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2533
2534        { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2535        { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2536        { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2537        { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2538        { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2539        { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2540
2541        { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2542        { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2543        { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2544        { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2545
2546        { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2547        { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2548        { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2549        { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2550        { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2551        { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2552        { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2553
2554        { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2555        { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2556        { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2557        { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2558
2559        { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2560        { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2561        { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2562        { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2563
2564        { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2565        { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2566        { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2567        { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2568        { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2569
2570        { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2571
2572        { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2573
2574        { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2575        { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2576        { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2577
2578        { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2579        { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2580        { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2581        { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2582        { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2583
2584        { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2585        { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2586        { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2587
2588        { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2589        { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2590        { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2591        { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2592
2593        { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2594        { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2595        { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2596        { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2597        { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2598        { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2599
2600        { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2601        { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2602        { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2603        { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2604
2605        { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2606        { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2607        { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2608        { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2609
2610        { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2611        { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2612        { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2613        { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2614
2615        { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2616
2617        /* unassigned code points in new default Bidi R blocks */
2618        { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2619        { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2620
2621        /* test some script codes >127 */
2622        { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2623        { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2624        { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2625
2626        { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2627
2628        /* value changed in Unicode 6.0 */
2629        { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2630
2631        { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2632
2633        /* unassigned code points in new/changed default Bidi AL blocks */
2634        { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2635        { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2636
2637        { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2638
2639        /* unassigned code points in the currency symbols block now default to ET */
2640        { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2641        { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2642
2643        /* new property in Unicode 6.3 */
2644        { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2645        { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2646        { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2647        { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2648        { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2649        { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2650
2651        /* undefined UProperty values */
2652        { 0x61, 0x4a7, 0 },
2653        { 0x234bc, 0x15ed, 0 }
2654    };
2655
2656    UVersionInfo version;
2657    UChar32 c;
2658    int32_t i, result, uVersion;
2659    UProperty which;
2660
2661    /* what is our Unicode version? */
2662    u_getUnicodeVersion(version);
2663    uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2664
2665    u_charAge(0x20, version);
2666    if(version[0]==0) {
2667        /* no additional properties available */
2668        log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2669        return;
2670    }
2671
2672    /* test u_charAge() */
2673    for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2674        u_charAge(charAges[i].c, version);
2675        if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2676            log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2677                charAges[i].c,
2678                version[0], version[1], version[2], version[3],
2679                charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2680        }
2681    }
2682
2683    if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2684        u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2685        u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2686        u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2687        u_getIntPropertyMinValue(0x2345)!=0
2688    ) {
2689        log_err("error: u_getIntPropertyMinValue() wrong\n");
2690    }
2691    if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2692        log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2693    }
2694    if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2695        log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2696    }
2697    if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2698        log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2699    }
2700    if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2701        log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2702    }
2703    if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2704        log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2705    }
2706    if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2707        log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2708    }
2709    if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2710        log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2711    }
2712    if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2713        log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2714    }
2715    if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2716        log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2717    }
2718    if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2719        log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2720    }
2721    if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2722        log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2723    }
2724    if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2725        log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2726    }
2727    if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2728        log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2729    }
2730    if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2731        log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2732    }
2733    /*JB#2410*/
2734    if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2735        log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2736    }
2737    if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2738        log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2739    }
2740    if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2741        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2742    }
2743    if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2744        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2745    }
2746    if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2747        log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2748    }
2749
2750    /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2751    for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2752        const char *whichName;
2753
2754        if(props[i][0]<0) {
2755            /* Unicode version break */
2756            if(uVersion<props[i][1]) {
2757                break; /* do not test properties that are not yet supported */
2758            } else {
2759                continue; /* skip this row */
2760            }
2761        }
2762
2763        c=(UChar32)props[i][0];
2764        which=(UProperty)props[i][1];
2765        whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2766
2767        if(which<UCHAR_INT_START) {
2768            result=u_hasBinaryProperty(c, which);
2769            if(result!=props[i][2]) {
2770                log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2771                        c, whichName, result, i);
2772            }
2773        }
2774
2775        result=u_getIntPropertyValue(c, which);
2776        if(result!=props[i][2]) {
2777            log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2778                    c, whichName, result, props[i][2], i);
2779        }
2780
2781        /* test separate functions, too */
2782        switch((UProperty)props[i][1]) {
2783        case UCHAR_ALPHABETIC:
2784            if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2785                log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2786                        props[i][0], result, i);
2787            }
2788            break;
2789        case UCHAR_LOWERCASE:
2790            if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2791                log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2792                        props[i][0], result, i);
2793            }
2794            break;
2795        case UCHAR_UPPERCASE:
2796            if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2797                log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2798                        props[i][0], result, i);
2799            }
2800            break;
2801        case UCHAR_WHITE_SPACE:
2802            if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2803                log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2804                        props[i][0], result, i);
2805            }
2806            break;
2807        default:
2808            break;
2809        }
2810    }
2811}
2812
2813static void
2814TestNumericProperties(void) {
2815    /* see UnicodeData.txt, DerivedNumericValues.txt */
2816    static const struct {
2817        UChar32 c;
2818        int32_t type;
2819        double numValue;
2820    } values[]={
2821        { 0x0F33, U_NT_NUMERIC, -1./2. },
2822        { 0x0C66, U_NT_DECIMAL, 0 },
2823        { 0x96f6, U_NT_NUMERIC, 0 },
2824        { 0xa833, U_NT_NUMERIC, 1./16. },
2825        { 0x2152, U_NT_NUMERIC, 1./10. },
2826        { 0x2151, U_NT_NUMERIC, 1./9. },
2827        { 0x1245f, U_NT_NUMERIC, 1./8. },
2828        { 0x2150, U_NT_NUMERIC, 1./7. },
2829        { 0x2159, U_NT_NUMERIC, 1./6. },
2830        { 0x09f6, U_NT_NUMERIC, 3./16. },
2831        { 0x2155, U_NT_NUMERIC, 1./5. },
2832        { 0x00BD, U_NT_NUMERIC, 1./2. },
2833        { 0x0031, U_NT_DECIMAL, 1. },
2834        { 0x4e00, U_NT_NUMERIC, 1. },
2835        { 0x58f1, U_NT_NUMERIC, 1. },
2836        { 0x10320, U_NT_NUMERIC, 1. },
2837        { 0x0F2B, U_NT_NUMERIC, 3./2. },
2838        { 0x00B2, U_NT_DIGIT, 2. },
2839        { 0x5f10, U_NT_NUMERIC, 2. },
2840        { 0x1813, U_NT_DECIMAL, 3. },
2841        { 0x5f0e, U_NT_NUMERIC, 3. },
2842        { 0x2173, U_NT_NUMERIC, 4. },
2843        { 0x8086, U_NT_NUMERIC, 4. },
2844        { 0x278E, U_NT_DIGIT, 5. },
2845        { 0x1D7F2, U_NT_DECIMAL, 6. },
2846        { 0x247A, U_NT_DIGIT, 7. },
2847        { 0x7396, U_NT_NUMERIC, 9. },
2848        { 0x1372, U_NT_NUMERIC, 10. },
2849        { 0x216B, U_NT_NUMERIC, 12. },
2850        { 0x16EE, U_NT_NUMERIC, 17. },
2851        { 0x249A, U_NT_NUMERIC, 19. },
2852        { 0x303A, U_NT_NUMERIC, 30. },
2853        { 0x5345, U_NT_NUMERIC, 30. },
2854        { 0x32B2, U_NT_NUMERIC, 37. },
2855        { 0x1375, U_NT_NUMERIC, 40. },
2856        { 0x10323, U_NT_NUMERIC, 50. },
2857        { 0x0BF1, U_NT_NUMERIC, 100. },
2858        { 0x964c, U_NT_NUMERIC, 100. },
2859        { 0x217E, U_NT_NUMERIC, 500. },
2860        { 0x2180, U_NT_NUMERIC, 1000. },
2861        { 0x4edf, U_NT_NUMERIC, 1000. },
2862        { 0x2181, U_NT_NUMERIC, 5000. },
2863        { 0x137C, U_NT_NUMERIC, 10000. },
2864        { 0x4e07, U_NT_NUMERIC, 10000. },
2865        { 0x12432, U_NT_NUMERIC, 216000. },
2866        { 0x12433, U_NT_NUMERIC, 432000. },
2867        { 0x4ebf, U_NT_NUMERIC, 100000000. },
2868        { 0x5146, U_NT_NUMERIC, 1000000000000. },
2869        { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2870        { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2871        { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2872        { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2873        { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2874        { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2875        { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2876        { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2877    };
2878
2879    double nv;
2880    UChar32 c;
2881    int32_t i, type;
2882
2883    for(i=0; i<LENGTHOF(values); ++i) {
2884        c=values[i].c;
2885        type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2886        nv=u_getNumericValue(c);
2887
2888        if(type!=values[i].type) {
2889            log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2890        }
2891        if(0.000001 <= fabs(nv - values[i].numValue)) {
2892            log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2893        }
2894    }
2895}
2896
2897/**
2898 * Test the property names and property value names API.
2899 */
2900static void
2901TestPropertyNames(void) {
2902    int32_t p, v, choice=0, rev;
2903    UBool atLeastSomething = FALSE;
2904
2905    for (p=0; ; ++p) {
2906        UProperty propEnum = (UProperty)p;
2907        UBool sawProp = FALSE;
2908        if(p > 10 && !atLeastSomething) {
2909          log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2910          return;
2911        }
2912
2913        for (choice=0; ; ++choice) {
2914            const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2915            if (name) {
2916                if (!sawProp)
2917                    log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2918                log_verbose("%d=\"%s\"", choice, name);
2919                sawProp = TRUE;
2920                atLeastSomething = TRUE;
2921
2922                /* test reverse mapping */
2923                rev = u_getPropertyEnum(name);
2924                if (rev != p) {
2925                    log_err("Property round-trip failure: %d -> %s -> %d\n",
2926                            p, name, rev);
2927                }
2928            }
2929            if (!name && choice>0) break;
2930        }
2931        if (sawProp) {
2932            /* looks like a valid property; check the values */
2933            const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2934            int32_t max = 0;
2935            if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2936                max = 255;
2937            } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2938                /* it's far too slow to iterate all the way up to
2939                   the real max, U_GC_P_MASK */
2940                max = U_GC_NL_MASK;
2941            } else if (p == UCHAR_BLOCK) {
2942                /* UBlockCodes, unlike other values, start at 1 */
2943                max = 1;
2944            }
2945            log_verbose("\n");
2946            for (v=-1; ; ++v) {
2947                UBool sawValue = FALSE;
2948                for (choice=0; ; ++choice) {
2949                    const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2950                    if (vname) {
2951                        if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2952                        log_verbose("%d=\"%s\"", choice, vname);
2953                        sawValue = TRUE;
2954
2955                        /* test reverse mapping */
2956                        rev = u_getPropertyValueEnum(propEnum, vname);
2957                        if (rev != v) {
2958                            log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2959                                    pname, v, vname, rev);
2960                        }
2961                    }
2962                    if (!vname && choice>0) break;
2963                }
2964                if (sawValue) {
2965                    log_verbose("\n");
2966                }
2967                if (!sawValue && v>=max) break;
2968            }
2969        }
2970        if (!sawProp) {
2971            if (p>=UCHAR_STRING_LIMIT) {
2972                break;
2973            } else if (p>=UCHAR_DOUBLE_LIMIT) {
2974                p = UCHAR_STRING_START - 1;
2975            } else if (p>=UCHAR_MASK_LIMIT) {
2976                p = UCHAR_DOUBLE_START - 1;
2977            } else if (p>=UCHAR_INT_LIMIT) {
2978                p = UCHAR_MASK_START - 1;
2979            } else if (p>=UCHAR_BINARY_LIMIT) {
2980                p = UCHAR_INT_START - 1;
2981            }
2982        }
2983    }
2984}
2985
2986/**
2987 * Test the property values API.  See JB#2410.
2988 */
2989static void
2990TestPropertyValues(void) {
2991    int32_t i, p, min, max;
2992    UErrorCode ec;
2993
2994    /* Min should be 0 for everything. */
2995    /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2996    for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2997        UProperty propEnum = (UProperty)p;
2998        min = u_getIntPropertyMinValue(propEnum);
2999        if (min != 0) {
3000            if (p == UCHAR_BLOCK) {
3001                /* This is okay...for now.  See JB#2487.
3002                   TODO Update this for JB#2487. */
3003            } else {
3004                const char* name;
3005                name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3006                if (name == NULL)
3007                    name = "<ERROR>";
3008                log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3009                        name, min);
3010            }
3011        }
3012    }
3013
3014    if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3015        u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3016        log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3017    }
3018
3019    /* Max should be -1 for invalid properties. */
3020    max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
3021    if (max != -1) {
3022        log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3023                max);
3024    }
3025
3026    /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
3027    for (i=0; i<2; ++i) {
3028        int32_t script;
3029        const char* desc;
3030        ec = U_ZERO_ERROR;
3031        switch (i) {
3032        case 0:
3033            script = uscript_getScript(-1, &ec);
3034            desc = "uscript_getScript(-1)";
3035            break;
3036        case 1:
3037            script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3038            desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3039            break;
3040        default:
3041            log_err("Internal test error. Too many scripts\n");
3042            return;
3043        }
3044        /* We don't explicitly test ec.  It should be U_FAILURE but it
3045           isn't documented as such. */
3046        if (script != (int32_t)USCRIPT_INVALID_CODE) {
3047            log_err("FAIL: %s = %d, exp. 0\n",
3048                    desc, script);
3049        }
3050    }
3051}
3052
3053/* various tests for consistency of UCD data and API behavior */
3054static void
3055TestConsistency() {
3056    char buffer[300];
3057    USet *set1, *set2, *set3, *set4;
3058    UErrorCode errorCode;
3059
3060    UChar32 start, end;
3061    int32_t i, length;
3062
3063    U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3064    U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3065    U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3066    U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3067    U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3068
3069    U_STRING_DECL(mathBlocksPattern,
3070        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3071        1+32+46+46+45+43+1+1); /* +1 for NUL */
3072    U_STRING_DECL(mathPattern, "[:Math:]", 8);
3073    U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3074    U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3075    U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3076
3077    U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3078    U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3079    U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3080    U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3081    U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3082
3083    U_STRING_INIT(mathBlocksPattern,
3084        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3085        1+32+46+46+45+43+1+1); /* +1 for NUL */
3086    U_STRING_INIT(mathPattern, "[:Math:]", 8);
3087    U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3088    U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3089    U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3090
3091    /*
3092     * It used to be that UCD.html and its precursors said
3093     * "Those dashes used to mark connections between pieces of words,
3094     *  plus the Katakana middle dot."
3095     *
3096     * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3097     * but not from Hyphen.
3098     * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3099     * Therefore, do not show errors when testing the Hyphen property.
3100     */
3101    log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3102                "known to the UTC and not considered errors.\n");
3103
3104    errorCode=U_ZERO_ERROR;
3105    set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3106    set2=uset_openPattern(dashPattern, 8, &errorCode);
3107    if(U_SUCCESS(errorCode)) {
3108        /* remove the Katakana middle dot(s) from set1 */
3109        uset_remove(set1, 0x30fb);
3110        uset_remove(set1, 0xff65); /* halfwidth variant */
3111        showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3112    } else {
3113        log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3114    }
3115
3116    /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3117    set3=uset_openPattern(formatPattern, 6, &errorCode);
3118    set4=uset_openPattern(alphaPattern, 14, &errorCode);
3119    if(U_SUCCESS(errorCode)) {
3120        showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3121        showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3122        showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3123    } else {
3124        log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3125    }
3126
3127    uset_close(set1);
3128    uset_close(set2);
3129    uset_close(set3);
3130    uset_close(set4);
3131
3132    /*
3133     * Check that each lowercase character has "small" in its name
3134     * and not "capital".
3135     * There are some such characters, some of which seem odd.
3136     * Use the verbose flag to see these notices.
3137     */
3138    errorCode=U_ZERO_ERROR;
3139    set1=uset_openPattern(lowerPattern, 13, &errorCode);
3140    if(U_SUCCESS(errorCode)) {
3141        for(i=0;; ++i) {
3142            length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3143            if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3144                break; /* done */
3145            }
3146            if(U_FAILURE(errorCode)) {
3147                log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3148                        i, u_errorName(errorCode));
3149                break;
3150            }
3151            if(length!=0) {
3152                break; /* done with code points, got a string or -1 */
3153            }
3154
3155            while(start<=end) {
3156                length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3157                if(U_FAILURE(errorCode)) {
3158                    log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3159                    errorCode=U_ZERO_ERROR;
3160                }
3161                if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3162                    strstr(buffer, "SMALL CAPITAL")==NULL
3163                ) {
3164                    log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3165                }
3166                ++start;
3167            }
3168        }
3169    } else {
3170        log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3171    }
3172    uset_close(set1);
3173
3174    /* verify that all assigned characters in Math blocks are exactly Math characters */
3175    errorCode=U_ZERO_ERROR;
3176    set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3177    set2=uset_openPattern(mathPattern, 8, &errorCode);
3178    set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3179    if(U_SUCCESS(errorCode)) {
3180        uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3181        uset_complement(set3);      /* assigned characters */
3182        uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3183        compareUSets(set1, set2,
3184                     "[assigned Math block chars]", "[math blocks]&[:Math:]",
3185                     TRUE);
3186    } else {
3187        log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3188    }
3189    uset_close(set1);
3190    uset_close(set2);
3191    uset_close(set3);
3192
3193    /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3194    errorCode=U_ZERO_ERROR;
3195    set1=uset_openPattern(unknownPattern, 14, &errorCode);
3196    set2=uset_openPattern(reservedPattern, 20, &errorCode);
3197    if(U_SUCCESS(errorCode)) {
3198        compareUSets(set1, set2,
3199                     "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3200                     TRUE);
3201    } else {
3202        log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3203    }
3204    uset_close(set1);
3205    uset_close(set2);
3206}
3207
3208/*
3209 * Starting with ICU4C 3.4, the core Unicode properties files
3210 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3211 * are hardcoded in the common DLL and therefore not included
3212 * in the data package any more.
3213 * Test requiring these files are disabled so that
3214 * we need not jump through hoops (like adding snapshots of these files
3215 * to testdata).
3216 * See Jitterbug 4497.
3217 */
3218#define HARDCODED_DATA_4497 1
3219
3220/* API coverage for ucase.c */
3221static void TestUCase() {
3222#if !HARDCODED_DATA_4497
3223    UDataMemory *pData;
3224    UCaseProps *csp;
3225    const UCaseProps *ccsp;
3226    UErrorCode errorCode;
3227
3228    /* coverage for ucase_openBinary() */
3229    errorCode=U_ZERO_ERROR;
3230    pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3231    if(U_FAILURE(errorCode)) {
3232        log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3233                    u_errorName(errorCode));
3234        return;
3235    }
3236
3237    csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3238    if(U_FAILURE(errorCode)) {
3239        log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3240                u_errorName(errorCode));
3241        udata_close(pData);
3242        return;
3243    }
3244
3245    if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3246        log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3247    }
3248
3249    ucase_close(csp);
3250    udata_close(pData);
3251
3252    /* coverage for ucase_getDummy() */
3253    errorCode=U_ZERO_ERROR;
3254    ccsp=ucase_getDummy(&errorCode);
3255    if(ucase_tolower(ccsp, 0x41)!=0x41) {
3256        log_err("ucase_tolower(dummy, A)!=A\n");
3257    }
3258#endif
3259}
3260
3261/* API coverage for ubidi_props.c */
3262static void TestUBiDiProps() {
3263#if !HARDCODED_DATA_4497
3264    UDataMemory *pData;
3265    UBiDiProps *bdp;
3266    const UBiDiProps *cbdp;
3267    UErrorCode errorCode;
3268
3269    /* coverage for ubidi_openBinary() */
3270    errorCode=U_ZERO_ERROR;
3271    pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3272    if(U_FAILURE(errorCode)) {
3273        log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3274                    u_errorName(errorCode));
3275        return;
3276    }
3277
3278    bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3279    if(U_FAILURE(errorCode)) {
3280        log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3281                u_errorName(errorCode));
3282        udata_close(pData);
3283        return;
3284    }
3285
3286    if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3287        log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3288    }
3289
3290    ubidi_closeProps(bdp);
3291    udata_close(pData);
3292
3293    /* coverage for ubidi_getDummy() */
3294    errorCode=U_ZERO_ERROR;
3295    cbdp=ubidi_getDummy(&errorCode);
3296    if(ubidi_getClass(cbdp, 0x20)!=0) {
3297        log_err("ubidi_getClass(dummy, space)!=0\n");
3298    }
3299#endif
3300}
3301
3302/* test case folding, compare return values with CaseFolding.txt ------------ */
3303
3304/* bit set for which case foldings for a character have been tested already */
3305enum {
3306    CF_SIMPLE=1,
3307    CF_FULL=2,
3308    CF_TURKIC=4,
3309    CF_ALL=7
3310};
3311
3312static void
3313testFold(UChar32 c, int which,
3314         UChar32 simple, UChar32 turkic,
3315         const UChar *full, int32_t fullLength,
3316         const UChar *turkicFull, int32_t turkicFullLength) {
3317    UChar s[2], t[32];
3318    UChar32 c2;
3319    int32_t length, length2;
3320
3321    UErrorCode errorCode=U_ZERO_ERROR;
3322
3323    length=0;
3324    U16_APPEND_UNSAFE(s, length, c);
3325
3326    if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3327        log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3328    }
3329    if((which&CF_FULL)!=0) {
3330        length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3331        if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3332            log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3333        }
3334    }
3335    if((which&CF_TURKIC)!=0) {
3336        if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3337            log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3338        }
3339
3340        length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3341        if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3342            log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3343        }
3344    }
3345}
3346
3347/* test that c case-folds to itself */
3348static void
3349testFoldToSelf(UChar32 c, int which) {
3350    UChar s[2];
3351    int32_t length;
3352
3353    length=0;
3354    U16_APPEND_UNSAFE(s, length, c);
3355    testFold(c, which, c, c, s, length, s, length);
3356}
3357
3358struct CaseFoldingData {
3359    USet *notSeen;
3360    UChar32 prev, prevSimple;
3361    UChar prevFull[32];
3362    int32_t prevFullLength;
3363    int which;
3364};
3365typedef struct CaseFoldingData CaseFoldingData;
3366
3367static void U_CALLCONV
3368caseFoldingLineFn(void *context,
3369                  char *fields[][2], int32_t fieldCount,
3370                  UErrorCode *pErrorCode) {
3371    CaseFoldingData *pData=(CaseFoldingData *)context;
3372    char *end;
3373    UChar full[32];
3374    UChar32 c, prev, simple;
3375    int32_t count;
3376    int which;
3377    char status;
3378
3379    /* get code point */
3380    const char *s=u_skipWhitespace(fields[0][0]);
3381    if(0==strncmp(s, "0000..10FFFF", 12)) {
3382        /*
3383         * Ignore the line
3384         * # @missing: 0000..10FFFF; C; <code point>
3385         * because maps-to-self is already our default, and this line breaks this parser.
3386         */
3387        return;
3388    }
3389    c=(UChar32)strtoul(s, &end, 16);
3390    end=(char *)u_skipWhitespace(end);
3391    if(end<=fields[0][0] || end!=fields[0][1]) {
3392        log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3393        *pErrorCode=U_PARSE_ERROR;
3394        return;
3395    }
3396
3397    /* get the status of this mapping */
3398    status=*u_skipWhitespace(fields[1][0]);
3399    if(status!='C' && status!='S' && status!='F' && status!='T') {
3400        log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3401        *pErrorCode=U_PARSE_ERROR;
3402        return;
3403    }
3404
3405    /* get the mapping */
3406    count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3407    if(U_FAILURE(*pErrorCode)) {
3408        log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3409        return;
3410    }
3411
3412    /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3413    if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3414        simple=c;
3415    }
3416
3417    if(c!=(prev=pData->prev)) {
3418        /*
3419         * Test remaining mappings for the previous code point.
3420         * If a turkic folding was not mentioned, then it should fold the same
3421         * as the regular simple case folding.
3422         */
3423        UChar prevString[2];
3424        int32_t length;
3425
3426        length=0;
3427        U16_APPEND_UNSAFE(prevString, length, prev);
3428        testFold(prev, (~pData->which)&CF_ALL,
3429                 prev, pData->prevSimple,
3430                 prevString, length,
3431                 pData->prevFull, pData->prevFullLength);
3432        pData->prev=pData->prevSimple=c;
3433        length=0;
3434        U16_APPEND_UNSAFE(pData->prevFull, length, c);
3435        pData->prevFullLength=length;
3436        pData->which=0;
3437    }
3438
3439    /*
3440     * Turn the status into a bit set of case foldings to test.
3441     * Remember non-Turkic case foldings as defaults for Turkic mode.
3442     */
3443    switch(status) {
3444    case 'C':
3445        which=CF_SIMPLE|CF_FULL;
3446        pData->prevSimple=simple;
3447        u_memcpy(pData->prevFull, full, count);
3448        pData->prevFullLength=count;
3449        break;
3450    case 'S':
3451        which=CF_SIMPLE;
3452        pData->prevSimple=simple;
3453        break;
3454    case 'F':
3455        which=CF_FULL;
3456        u_memcpy(pData->prevFull, full, count);
3457        pData->prevFullLength=count;
3458        break;
3459    case 'T':
3460        which=CF_TURKIC;
3461        break;
3462    default:
3463        which=0;
3464        break; /* won't happen because of test above */
3465    }
3466
3467    testFold(c, which, simple, simple, full, count, full, count);
3468
3469    /* remember which case foldings of c have been tested */
3470    pData->which|=which;
3471
3472    /* remove c from the set of ones not mentioned in CaseFolding.txt */
3473    uset_remove(pData->notSeen, c);
3474}
3475
3476static void
3477TestCaseFolding() {
3478    CaseFoldingData data={ NULL };
3479    char *fields[3][2];
3480    UErrorCode errorCode;
3481
3482    static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3483
3484    errorCode=U_ZERO_ERROR;
3485    /* test BMP & plane 1 - nothing interesting above */
3486    data.notSeen=uset_open(0, 0x1ffff);
3487    data.prevFullLength=1; /* length of full case folding of U+0000 */
3488
3489    parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3490    if(U_SUCCESS(errorCode)) {
3491        int32_t i, start, end;
3492
3493        /* add a pseudo-last line to finish testing of the actual last one */
3494        fields[0][0]=lastLine;
3495        fields[0][1]=lastLine+6;
3496        fields[1][0]=lastLine+7;
3497        fields[1][1]=lastLine+9;
3498        fields[2][0]=lastLine+10;
3499        fields[2][1]=lastLine+17;
3500        caseFoldingLineFn(&data, fields, 3, &errorCode);
3501
3502        /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3503        for(i=0;
3504            0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3505                U_SUCCESS(errorCode);
3506            ++i
3507        ) {
3508            do {
3509                testFoldToSelf(start, CF_ALL);
3510            } while(++start<=end);
3511        }
3512    }
3513
3514    uset_close(data.notSeen);
3515}
3516