1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CUCDTST.C
9*
10* Modification History:
11*        Name                     Description
12*     Madhu Katragadda            Ported for C API, added tests for string functions
13********************************************************************************
14*/
15
16#include <string.h>
17#include <math.h>
18#include <stdlib.h>
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/putil.h"
23#include "unicode/ustring.h"
24#include "unicode/uloc.h"
25#include "unicode/unorm2.h"
26
27#include "cintltst.h"
28#include "putilimp.h"
29#include "uparse.h"
30#include "ucase.h"
31#include "ubidi_props.h"
32#include "uprops.h"
33#include "uset_imp.h"
34#include "usc_impl.h"
35#include "udatamem.h" /* for testing ucase_openBinary() */
36#include "cucdapi.h"
37
38#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
39
40/* prototypes --------------------------------------------------------------- */
41
42static void TestUpperLower(void);
43static void TestLetterNumber(void);
44static void TestMisc(void);
45static void TestPOSIX(void);
46static void TestControlPrint(void);
47static void TestIdentifier(void);
48static void TestUnicodeData(void);
49static void TestCodeUnit(void);
50static void TestCodePoint(void);
51static void TestCharLength(void);
52static void TestCharNames(void);
53static void TestMirroring(void);
54static void TestUScriptRunAPI(void);
55static void TestAdditionalProperties(void);
56static void TestNumericProperties(void);
57static void TestPropertyNames(void);
58static void TestPropertyValues(void);
59static void TestConsistency(void);
60static void TestUCase(void);
61static void TestUBiDiProps(void);
62static void TestCaseFolding(void);
63
64/* internal methods used */
65static int32_t MakeProp(char* str);
66static int32_t MakeDir(char* str);
67
68/* helpers ------------------------------------------------------------------ */
69
70static void
71parseUCDFile(const char *filename,
72             char *fields[][2], int32_t fieldCount,
73             UParseLineFn *lineFn, void *context,
74             UErrorCode *pErrorCode) {
75    char path[256];
76    char backupPath[256];
77
78    if(U_FAILURE(*pErrorCode)) {
79        return;
80    }
81
82    /* Look inside ICU_DATA first */
83    strcpy(path, u_getDataDirectory());
84    strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
85    strcat(path, filename);
86
87    /* As a fallback, try to guess where the source data was located
88     *    at the time ICU was built, and look there.
89     */
90    strcpy(backupPath, ctest_dataSrcDir());
91    strcat(backupPath, U_FILE_SEP_STRING);
92    strcat(backupPath, "unidata" U_FILE_SEP_STRING);
93    strcat(backupPath, filename);
94
95    u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
96    if(*pErrorCode==U_FILE_ACCESS_ERROR) {
97        *pErrorCode=U_ZERO_ERROR;
98        u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
99    }
100    if(U_FAILURE(*pErrorCode)) {
101        log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
102    }
103}
104
105/* test data ---------------------------------------------------------------- */
106
107static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
108static const int32_t tagValues[] =
109    {
110    /* Mn */ U_NON_SPACING_MARK,
111    /* Mc */ U_COMBINING_SPACING_MARK,
112    /* Me */ U_ENCLOSING_MARK,
113    /* Nd */ U_DECIMAL_DIGIT_NUMBER,
114    /* Nl */ U_LETTER_NUMBER,
115    /* No */ U_OTHER_NUMBER,
116    /* Zs */ U_SPACE_SEPARATOR,
117    /* Zl */ U_LINE_SEPARATOR,
118    /* Zp */ U_PARAGRAPH_SEPARATOR,
119    /* Cc */ U_CONTROL_CHAR,
120    /* Cf */ U_FORMAT_CHAR,
121    /* Cs */ U_SURROGATE,
122    /* Co */ U_PRIVATE_USE_CHAR,
123    /* Cn */ U_UNASSIGNED,
124    /* Lu */ U_UPPERCASE_LETTER,
125    /* Ll */ U_LOWERCASE_LETTER,
126    /* Lt */ U_TITLECASE_LETTER,
127    /* Lm */ U_MODIFIER_LETTER,
128    /* Lo */ U_OTHER_LETTER,
129    /* Pc */ U_CONNECTOR_PUNCTUATION,
130    /* Pd */ U_DASH_PUNCTUATION,
131    /* Ps */ U_START_PUNCTUATION,
132    /* Pe */ U_END_PUNCTUATION,
133    /* Po */ U_OTHER_PUNCTUATION,
134    /* Sm */ U_MATH_SYMBOL,
135    /* Sc */ U_CURRENCY_SYMBOL,
136    /* Sk */ U_MODIFIER_SYMBOL,
137    /* So */ U_OTHER_SYMBOL,
138    /* Pi */ U_INITIAL_PUNCTUATION,
139    /* Pf */ U_FINAL_PUNCTUATION
140    };
141
142static const char dirStrings[][5] = {
143    "L",
144    "R",
145    "EN",
146    "ES",
147    "ET",
148    "AN",
149    "CS",
150    "B",
151    "S",
152    "WS",
153    "ON",
154    "LRE",
155    "LRO",
156    "AL",
157    "RLE",
158    "RLO",
159    "PDF",
160    "NSM",
161    "BN",
162    /* new in Unicode 6.3/ICU 52 */
163    "FSI",
164    "LRI",
165    "RLI",
166    "PDI"
167};
168
169void addUnicodeTest(TestNode** root);
170
171void addUnicodeTest(TestNode** root)
172{
173    addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
174    addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
175    addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
176    addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
177    addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
178    addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
179    addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
180    addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
181    addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
182    addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
183    addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
184    addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
185    addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
186    addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
187    addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
188    addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
189    addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
190    addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
191    addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
192    addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
193    addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
194    addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
195    addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
196    addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
197    addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
198    addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
199}
200
201/*==================================================== */
202/* test u_toupper() and u_tolower()                    */
203/*==================================================== */
204static void TestUpperLower()
205{
206    const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
207    const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
208    U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
209    U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
210    int32_t i;
211
212    U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
213    U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
214
215/*
216Checks LetterLike Symbols which were previously a source of confusion
217[Bertrand A. D. 02/04/98]
218*/
219    for (i=0x2100;i<0x2138;i++)
220    {
221        /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
222        if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
223        {
224            if (i != (int)u_tolower(i)) /* itself */
225                log_err("Failed case conversion with itself: U+%04x\n", i);
226            if (i != (int)u_toupper(i))
227                log_err("Failed case conversion with itself: U+%04x\n", i);
228        }
229    }
230
231    for(i=0; i < u_strlen(upper); i++){
232        if(u_tolower(upper[i]) != lower[i]){
233            log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
234        }
235    }
236
237    log_verbose("testing upper lower\n");
238    for (i = 0; i < 21; i++) {
239
240        if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
241        {
242            log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
243        }
244        else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
245         {
246            log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
247        }
248        else if (upperTest[i] != u_tolower(lowerTest[i]))
249        {
250            log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
251        }
252        else if (lowerTest[i] != u_toupper(upperTest[i]))
253         {
254            log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
255        }
256        else if (upperTest[i] != u_tolower(upperTest[i]))
257        {
258            log_err("Failed case conversion with itself: %c\n", upperTest[i]);
259        }
260        else if (lowerTest[i] != u_toupper(lowerTest[i]))
261        {
262            log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
263        }
264    }
265    log_verbose("done testing upper lower\n");
266
267    log_verbose("testing u_istitle\n");
268    {
269        static const UChar expected[] = {
270            0x1F88,
271            0x1F89,
272            0x1F8A,
273            0x1F8B,
274            0x1F8C,
275            0x1F8D,
276            0x1F8E,
277            0x1F8F,
278            0x1F88,
279            0x1F89,
280            0x1F8A,
281            0x1F8B,
282            0x1F8C,
283            0x1F8D,
284            0x1F8E,
285            0x1F8F,
286            0x1F98,
287            0x1F99,
288            0x1F9A,
289            0x1F9B,
290            0x1F9C,
291            0x1F9D,
292            0x1F9E,
293            0x1F9F,
294            0x1F98,
295            0x1F99,
296            0x1F9A,
297            0x1F9B,
298            0x1F9C,
299            0x1F9D,
300            0x1F9E,
301            0x1F9F,
302            0x1FA8,
303            0x1FA9,
304            0x1FAA,
305            0x1FAB,
306            0x1FAC,
307            0x1FAD,
308            0x1FAE,
309            0x1FAF,
310            0x1FA8,
311            0x1FA9,
312            0x1FAA,
313            0x1FAB,
314            0x1FAC,
315            0x1FAD,
316            0x1FAE,
317            0x1FAF,
318            0x1FBC,
319            0x1FBC,
320            0x1FCC,
321            0x1FCC,
322            0x1FFC,
323            0x1FFC,
324        };
325        int32_t num = sizeof(expected)/sizeof(expected[0]);
326        for(i=0; i<num; i++){
327            if(!u_istitle(expected[i])){
328                log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
329            }
330        }
331
332    }
333}
334
335/* compare two sets and verify that their difference or intersection is empty */
336static UBool
337showADiffB(const USet *a, const USet *b,
338           const char *a_name, const char *b_name,
339           UBool expect, UBool diffIsError) {
340    USet *aa;
341    int32_t i, start, end, length;
342    UErrorCode errorCode;
343
344    /*
345     * expect:
346     * TRUE  -> a-b should be empty, that is, b should contain all of a
347     * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
348     */
349    if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
350        return TRUE;
351    }
352
353    /* clone a to aa because a is const */
354    aa=uset_open(1, 0);
355    if(aa==NULL) {
356        /* unusual problem - out of memory? */
357        return FALSE;
358    }
359    uset_addAll(aa, a);
360
361    /* compute the set in question */
362    if(expect) {
363        /* a-b */
364        uset_removeAll(aa, b);
365    } else {
366        /* a&b */
367        uset_retainAll(aa, b);
368    }
369
370    /* aa is not empty because of the initial tests above; show its contents */
371    errorCode=U_ZERO_ERROR;
372    i=0;
373    for(;;) {
374        length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
375        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
376            break; /* done */
377        }
378        if(U_FAILURE(errorCode)) {
379            log_err("error comparing %s with %s at difference item %d: %s\n",
380                a_name, b_name, i, u_errorName(errorCode));
381            break;
382        }
383        if(length!=0) {
384            break; /* done with code points, got a string or -1 */
385        }
386
387        if(diffIsError) {
388            if(expect) {
389                log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
390            } else {
391                log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
392            }
393        } else {
394            if(expect) {
395                log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
396            } else {
397                log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
398            }
399        }
400
401        ++i;
402    }
403
404    uset_close(aa);
405    return FALSE;
406}
407
408static UBool
409showAMinusB(const USet *a, const USet *b,
410            const char *a_name, const char *b_name,
411            UBool diffIsError) {
412    return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
413}
414
415static UBool
416showAIntersectB(const USet *a, const USet *b,
417                const char *a_name, const char *b_name,
418                UBool diffIsError) {
419    return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
420}
421
422static UBool
423compareUSets(const USet *a, const USet *b,
424             const char *a_name, const char *b_name,
425             UBool diffIsError) {
426    /*
427     * Use an arithmetic & not a logical && so that both branches
428     * are always taken and all differences are shown.
429     */
430    return
431        showAMinusB(a, b, a_name, b_name, diffIsError) &
432        showAMinusB(b, a, b_name, a_name, diffIsError);
433}
434
435/* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
436static void TestLetterNumber()
437{
438    UChar i = 0x0000;
439
440    log_verbose("Testing for isalpha\n");
441    for (i = 0x0041; i < 0x005B; i++) {
442        if (!u_isalpha(i))
443        {
444            log_err("Failed isLetter test at  %.4X\n", i);
445        }
446    }
447    for (i = 0x0660; i < 0x066A; i++) {
448        if (u_isalpha(i))
449        {
450            log_err("Failed isLetter test with numbers at %.4X\n", i);
451        }
452    }
453
454    log_verbose("Testing for isdigit\n");
455    for (i = 0x0660; i < 0x066A; i++) {
456        if (!u_isdigit(i))
457        {
458            log_verbose("Failed isNumber test at %.4X\n", i);
459        }
460    }
461
462    log_verbose("Testing for isalnum\n");
463    for (i = 0x0041; i < 0x005B; i++) {
464        if (!u_isalnum(i))
465        {
466            log_err("Failed isAlNum test at  %.4X\n", i);
467        }
468    }
469    for (i = 0x0660; i < 0x066A; i++) {
470        if (!u_isalnum(i))
471        {
472            log_err("Failed isAlNum test at  %.4X\n", i);
473        }
474    }
475
476    {
477        /*
478         * The following checks work only starting from Unicode 4.0.
479         * Check the version number here.
480         */
481        static UVersionInfo u401={ 4, 0, 1, 0 };
482        UVersionInfo version;
483        u_getUnicodeVersion(version);
484        if(version[0]<4 || 0==memcmp(version, u401, 4)) {
485            return;
486        }
487    }
488
489    {
490        /*
491         * Sanity check:
492         * Verify that exactly the digit characters have decimal digit values.
493         * This assumption is used in the implementation of u_digit()
494         * (which checks nt=de)
495         * compared with the parallel java.lang.Character.digit()
496         * (which checks Nd).
497         *
498         * This was not true in Unicode 3.2 and earlier.
499         * Unicode 4.0 fixed discrepancies.
500         * Unicode 4.0.1 re-introduced problems in this area due to an
501         * unintentionally incomplete last-minute change.
502         */
503        U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
504        U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
505
506        USet *digits, *decimalValues;
507        UErrorCode errorCode;
508
509        U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
510        U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
511        errorCode=U_ZERO_ERROR;
512        digits=uset_openPattern(digitsPattern, 6, &errorCode);
513        decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
514
515        if(U_SUCCESS(errorCode)) {
516            compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
517        }
518
519        uset_close(digits);
520        uset_close(decimalValues);
521    }
522}
523
524static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
525                                const UChar32 *sampleChars, int32_t sampleCharsLength,
526                                UBool expected) {
527    int32_t i;
528    for (i = 0; i < sampleCharsLength; ++i) {
529        UBool result = propFn(sampleChars[i]);
530        if (result != expected) {
531            log_err("error: character property function %s(U+%04x)=%d is wrong\n",
532                    propName, sampleChars[i], result);
533        }
534    }
535}
536
537/* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
538static void TestMisc()
539{
540    static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
541    static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
542    static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
543    static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
544    static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
545    static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
546/*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
547    static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
548    static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
549    static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
550    static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
551
552    static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
553
554    uint32_t mask;
555
556    int32_t i;
557    char icuVersion[U_MAX_VERSION_STRING_LENGTH];
558    UVersionInfo realVersion;
559
560    memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
561
562    testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
563    testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
564
565    testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
566                        sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
567    testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
568                        sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
569
570    testSampleCharProps(u_isWhitespace, "u_isWhitespace",
571                        sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
572    testSampleCharProps(u_isWhitespace, "u_isWhitespace",
573                        sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
574
575    testSampleCharProps(u_isdefined, "u_isdefined",
576                        sampleDefined, LENGTHOF(sampleDefined), TRUE);
577    testSampleCharProps(u_isdefined, "u_isdefined",
578                        sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
579
580    testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
581    testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
582
583    testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
584    testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
585
586    for (i = 0; i < LENGTHOF(sampleDigits); i++) {
587        if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
588            log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
589                    sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
590        }
591    }
592
593    /* Tests the ICU version #*/
594    u_getVersion(realVersion);
595    u_versionToString(realVersion, icuVersion);
596    if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
597    {
598        log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
599    }
600#if defined(ICU_VERSION)
601    /* test only happens where we have configure.in with VERSION - sanity check. */
602    if(strcmp(U_ICU_VERSION, ICU_VERSION))
603    {
604        log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
605    }
606#endif
607
608    /* test U_GC_... */
609    if(
610        U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
611        U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
612        U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
613        U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
614        U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
615        U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
616    ) {
617        log_err("error: U_GET_GC_MASK does not work properly\n");
618    }
619
620    mask=0;
621    mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
622
623    mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
624    mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
625    mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
626    mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
627    mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
628
629    mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
630    mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
631    mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
632
633    mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
634    mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
635    mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
636
637    mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
638    mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
639    mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
640
641    mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
642    mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
643    mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
644    mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
645
646    mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
647    mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
648    mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
649    mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
650    mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
651
652    mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
653    mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
654    mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
655    mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
656
657    mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
658    mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
659
660    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
661        log_err("error: problems with U_GC_XX_MASK constants\n");
662    }
663
664    mask=0;
665    mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
666    mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
667    mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
668    mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
669    mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
670    mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
671    mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
672
673    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
674        log_err("error: problems with U_GC_Y_MASK constants\n");
675    }
676    {
677        static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
678        for(i=0; i<10; i++){
679            if(digit[i]!=u_forDigit(i,10)){
680                log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
681            }
682        }
683    }
684
685    /* test u_digit() */
686    {
687        static const struct {
688            UChar32 c;
689            int8_t radix, value;
690        } data[]={
691            /* base 16 */
692            { 0x0031, 16, 1 },
693            { 0x0038, 16, 8 },
694            { 0x0043, 16, 12 },
695            { 0x0066, 16, 15 },
696            { 0x00e4, 16, -1 },
697            { 0x0662, 16, 2 },
698            { 0x06f5, 16, 5 },
699            { 0xff13, 16, 3 },
700            { 0xff41, 16, 10 },
701
702            /* base 8 */
703            { 0x0031, 8, 1 },
704            { 0x0038, 8, -1 },
705            { 0x0043, 8, -1 },
706            { 0x0066, 8, -1 },
707            { 0x00e4, 8, -1 },
708            { 0x0662, 8, 2 },
709            { 0x06f5, 8, 5 },
710            { 0xff13, 8, 3 },
711            { 0xff41, 8, -1 },
712
713            /* base 36 */
714            { 0x5a, 36, 35 },
715            { 0x7a, 36, 35 },
716            { 0xff3a, 36, 35 },
717            { 0xff5a, 36, 35 },
718
719            /* wrong radix values */
720            { 0x0031, 1, -1 },
721            { 0xff3a, 37, -1 }
722        };
723
724        for(i=0; i<LENGTHOF(data); ++i) {
725            if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
726                log_err("u_digit(U+%04x, %d)=%d expected %d\n",
727                        data[i].c,
728                        data[i].radix,
729                        u_digit(data[i].c, data[i].radix),
730                        data[i].value);
731            }
732        }
733    }
734}
735
736/* test C/POSIX-style functions --------------------------------------------- */
737
738/* bit flags */
739#define ISAL     1
740#define ISLO     2
741#define ISUP     4
742
743#define ISDI     8
744#define ISXD  0x10
745
746#define ISAN  0x20
747
748#define ISPU  0x40
749#define ISGR  0x80
750#define ISPR 0x100
751
752#define ISSP 0x200
753#define ISBL 0x400
754#define ISCN 0x800
755
756/* C/POSIX-style functions, in the same order as the bit flags */
757typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
758
759static const struct {
760    IsPOSIXClass *fn;
761    const char *name;
762} posixClasses[]={
763    { u_isalpha, "isalpha" },
764    { u_islower, "islower" },
765    { u_isupper, "isupper" },
766    { u_isdigit, "isdigit" },
767    { u_isxdigit, "isxdigit" },
768    { u_isalnum, "isalnum" },
769    { u_ispunct, "ispunct" },
770    { u_isgraph, "isgraph" },
771    { u_isprint, "isprint" },
772    { u_isspace, "isspace" },
773    { u_isblank, "isblank" },
774    { u_iscntrl, "iscntrl" }
775};
776
777static const struct {
778    UChar32 c;
779    uint32_t posixResults;
780} posixData[]={
781    { 0x0008,                                                        ISCN },    /* backspace */
782    { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
783    { 0x000a,                                              ISSP|     ISCN },    /* LF */
784    { 0x000c,                                              ISSP|     ISCN },    /* FF */
785    { 0x000d,                                              ISSP|     ISCN },    /* CR */
786    { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
787    { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
788    { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
789    { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
790    { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
791    { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
792    { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
793    { 0x0085,                                              ISSP|     ISCN },    /* NEL */
794    { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
795    { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
796    { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
797    { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
798    { 0x0600,                                                        ISCN },    /* arabic number sign */
799    { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
800    { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
801    { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
802    { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
803    { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
804    { 0x200b,                                                        ISCN },    /* ZWSP */
805  /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
806    { 0x200e,                                                        ISCN },    /* LRM */
807    { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
808    { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
809    { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
810    { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
811    { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
812    { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
813    { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
814    { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
815};
816
817static void
818TestPOSIX() {
819    uint32_t mask;
820    int32_t cl, i;
821    UBool expect;
822
823    mask=1;
824    for(cl=0; cl<12; ++cl) {
825        for(i=0; i<LENGTHOF(posixData); ++i) {
826            expect=(UBool)((posixData[i].posixResults&mask)!=0);
827            if(posixClasses[cl].fn(posixData[i].c)!=expect) {
828                log_err("u_%s(U+%04x)=%s is wrong\n",
829                    posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
830            }
831        }
832        mask<<=1;
833    }
834}
835
836/* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
837static void TestControlPrint()
838{
839    const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
840    const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
841    const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
842    const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
843    UChar32 c;
844
845    testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
846    testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
847
848    testSampleCharProps(u_isprint, "u_isprint",
849                        samplePrintable, LENGTHOF(samplePrintable), TRUE);
850    testSampleCharProps(u_isprint, "u_isprint",
851                        sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
852
853    /* test all ISO 8 controls */
854    for(c=0; c<=0x9f; ++c) {
855        if(c==0x20) {
856            /* skip ASCII graphic characters and continue with DEL */
857            c=0x7f;
858        }
859        if(!u_iscntrl(c)) {
860            log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
861        }
862        if(!u_isISOControl(c)) {
863            log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
864        }
865        if(u_isprint(c)) {
866            log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
867        }
868    }
869
870    /* test all Latin-1 graphic characters */
871    for(c=0x20; c<=0xff; ++c) {
872        if(c==0x7f) {
873            c=0xa0;
874        } else if(c==0xad) {
875            /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
876            ++c;
877        }
878        if(!u_isprint(c)) {
879            log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
880        }
881    }
882}
883
884/* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
885static void TestIdentifier()
886{
887    const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
888    const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
889    const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
890    const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
891    const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
892    const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
893    const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
894    const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
895    const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
896    const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
897
898    testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
899                        sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
900    testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
901                        sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
902
903    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
904                        sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
905    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
906                        sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
907
908    /* IDPart should imply IDStart */
909    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
910                        sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
911
912    testSampleCharProps(u_isIDStart, "u_isIDStart",
913                        sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
914    testSampleCharProps(u_isIDStart, "u_isIDStart",
915                        sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
916
917    testSampleCharProps(u_isIDPart, "u_isIDPart",
918                        sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
919    testSampleCharProps(u_isIDPart, "u_isIDPart",
920                        sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
921
922    /* IDPart should imply IDStart */
923    testSampleCharProps(u_isIDPart, "u_isIDPart",
924                        sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
925
926    testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
927                        sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
928    testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
929                        sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
930}
931
932/* for each line of UnicodeData.txt, check some of the properties */
933typedef struct UnicodeDataContext {
934#if UCONFIG_NO_NORMALIZATION
935    const void *dummy;
936#else
937    const UNormalizer2 *nfc;
938    const UNormalizer2 *nfkc;
939#endif
940} UnicodeDataContext;
941
942/*
943 * ### TODO
944 * This test fails incorrectly if the First or Last code point of a repetitive area
945 * is overridden, which is allowed and is encouraged for the PUAs.
946 * Currently, this means that both area First/Last and override lines are
947 * tested against the properties from the API,
948 * and the area boundary will not match and cause an error.
949 *
950 * This function should detect area boundaries and skip them for the test of individual
951 * code points' properties.
952 * Then it should check that the areas contain all the same properties except where overridden.
953 * For this, it would have had to set a flag for which code points were listed explicitly.
954 */
955static void U_CALLCONV
956unicodeDataLineFn(void *context,
957                  char *fields[][2], int32_t fieldCount,
958                  UErrorCode *pErrorCode)
959{
960    char buffer[100];
961    const char *d;
962    char *end;
963    uint32_t value;
964    UChar32 c;
965    int32_t i;
966    int8_t type;
967    int32_t dt;
968    UChar dm[32], s[32];
969    int32_t dmLength, length;
970
971#if !UCONFIG_NO_NORMALIZATION
972    const UNormalizer2 *nfc, *nfkc;
973#endif
974
975    /* get the character code, field 0 */
976    c=strtoul(fields[0][0], &end, 16);
977    if(end<=fields[0][0] || end!=fields[0][1]) {
978        log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
979        return;
980    }
981    if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
982        log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
983        return;
984    }
985
986    /* get general category, field 2 */
987    *fields[2][1]=0;
988    type = (int8_t)tagValues[MakeProp(fields[2][0])];
989    if(u_charType(c)!=type) {
990        log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
991    }
992    if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
993        log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
994    }
995
996    /* get canonical combining class, field 3 */
997    value=strtoul(fields[3][0], &end, 10);
998    if(end<=fields[3][0] || end!=fields[3][1]) {
999        log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1000        return;
1001    }
1002    if(value>255) {
1003        log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1004        return;
1005    }
1006#if !UCONFIG_NO_NORMALIZATION
1007    if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1008        log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1009    }
1010    nfkc=((UnicodeDataContext *)context)->nfkc;
1011    if(value!=unorm2_getCombiningClass(nfkc, c)) {
1012        log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1013    }
1014#endif
1015
1016    /* get BiDi category, field 4 */
1017    *fields[4][1]=0;
1018    i=MakeDir(fields[4][0]);
1019    if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1020        log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1021    }
1022
1023    /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1024    d=NULL;
1025    if(fields[5][0]==fields[5][1]) {
1026        /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1027        if(c==0xac00 || c==0xd7a3) {
1028            dt=U_DT_CANONICAL;
1029        } else {
1030            dt=U_DT_NONE;
1031        }
1032    } else {
1033        d=fields[5][0];
1034        *fields[5][1]=0;
1035        dt=UCHAR_INVALID_CODE;
1036        if(*d=='<') {
1037            end=strchr(++d, '>');
1038            if(end!=NULL) {
1039                *end=0;
1040                dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1041                d=u_skipWhitespace(end+1);
1042            }
1043        } else {
1044            dt=U_DT_CANONICAL;
1045        }
1046    }
1047    if(dt>U_DT_NONE) {
1048        if(c==0xac00) {
1049            dm[0]=0x1100;
1050            dm[1]=0x1161;
1051            dm[2]=0;
1052            dmLength=2;
1053        } else if(c==0xd7a3) {
1054            dm[0]=0xd788;
1055            dm[1]=0x11c2;
1056            dm[2]=0;
1057            dmLength=2;
1058        } else {
1059            dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1060        }
1061    } else {
1062        dmLength=-1;
1063    }
1064    if(dt<0 || U_FAILURE(*pErrorCode)) {
1065        log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1066        return;
1067    }
1068#if !UCONFIG_NO_NORMALIZATION
1069    i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1070    if(i!=dt) {
1071        log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1072    }
1073    /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1074    length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1075    if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1076        log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1077                "or the Decomposition_Mapping is different (%s)\n",
1078                c, length, dmLength, u_errorName(*pErrorCode));
1079        return;
1080    }
1081    /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1082    if(dt!=U_DT_CANONICAL) {
1083        dmLength=-1;
1084    }
1085    nfc=((UnicodeDataContext *)context)->nfc;
1086    length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1087    if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1088        log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1089                "or the Decomposition_Mapping is different (%s)\n",
1090                c, length, dmLength, u_errorName(*pErrorCode));
1091        return;
1092    }
1093    /* recompose */
1094    if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1095        UChar32 a, b, composite;
1096        i=0;
1097        U16_NEXT(dm, i, dmLength, a);
1098        U16_NEXT(dm, i, dmLength, b);
1099        /* i==dmLength */
1100        composite=unorm2_composePair(nfc, a, b);
1101        if(composite!=c) {
1102            log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1103                    (long)c, (long)a, (long)b, (long)composite);
1104        }
1105        /*
1106         * Note: NFKC has fewer round-trip mappings than NFC,
1107         * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1108         */
1109    }
1110#endif
1111
1112    /* get ISO Comment, field 11 */
1113    *fields[11][1]=0;
1114    i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1115    if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1116        log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1117            c, u_errorName(*pErrorCode),
1118            U_FAILURE(*pErrorCode) ? buffer : "[error]",
1119            fields[11][0]);
1120    }
1121
1122    /* get uppercase mapping, field 12 */
1123    if(fields[12][0]!=fields[12][1]) {
1124        value=strtoul(fields[12][0], &end, 16);
1125        if(end!=fields[12][1]) {
1126            log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1127            return;
1128        }
1129        if((UChar32)value!=u_toupper(c)) {
1130            log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1131        }
1132    } else {
1133        /* no case mapping: the API must map the code point to itself */
1134        if(c!=u_toupper(c)) {
1135            log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1136        }
1137    }
1138
1139    /* get lowercase mapping, field 13 */
1140    if(fields[13][0]!=fields[13][1]) {
1141        value=strtoul(fields[13][0], &end, 16);
1142        if(end!=fields[13][1]) {
1143            log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1144            return;
1145        }
1146        if((UChar32)value!=u_tolower(c)) {
1147            log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1148        }
1149    } else {
1150        /* no case mapping: the API must map the code point to itself */
1151        if(c!=u_tolower(c)) {
1152            log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1153        }
1154    }
1155
1156    /* get titlecase mapping, field 14 */
1157    if(fields[14][0]!=fields[14][1]) {
1158        value=strtoul(fields[14][0], &end, 16);
1159        if(end!=fields[14][1]) {
1160            log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1161            return;
1162        }
1163        if((UChar32)value!=u_totitle(c)) {
1164            log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1165        }
1166    } else {
1167        /* no case mapping: the API must map the code point to itself */
1168        if(c!=u_totitle(c)) {
1169            log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1170        }
1171    }
1172}
1173
1174static UBool U_CALLCONV
1175enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1176    static const UChar32 test[][2]={
1177        {0x41, U_UPPERCASE_LETTER},
1178        {0x308, U_NON_SPACING_MARK},
1179        {0xfffe, U_GENERAL_OTHER_TYPES},
1180        {0xe0041, U_FORMAT_CHAR},
1181        {0xeffff, U_UNASSIGNED}
1182    };
1183
1184    int32_t i, count;
1185
1186    if(0!=strcmp((const char *)context, "a1")) {
1187        log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1188        return FALSE;
1189    }
1190
1191    count=LENGTHOF(test);
1192    for(i=0; i<count; ++i) {
1193        if(start<=test[i][0] && test[i][0]<limit) {
1194            if(type!=(UCharCategory)test[i][1]) {
1195                log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1196                        start, limit, (long)type, test[i][0], test[i][1]);
1197            }
1198            /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1199            return i==(count-1) ? FALSE : TRUE;
1200        }
1201    }
1202
1203    if(start>test[count-1][0]) {
1204        log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1205                start, limit, (long)type);
1206        return FALSE;
1207    }
1208
1209    return TRUE;
1210}
1211
1212static UBool U_CALLCONV
1213enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1214    /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1215    static const int32_t defaultBidi[][2]={ /* { limit, class } */
1216        { 0x0590, U_LEFT_TO_RIGHT },
1217        { 0x0600, U_RIGHT_TO_LEFT },
1218        { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1219        { 0x08A0, U_RIGHT_TO_LEFT },
1220        { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1221        { 0x20A0, U_LEFT_TO_RIGHT },
1222        { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1223        { 0xFB1D, U_LEFT_TO_RIGHT },
1224        { 0xFB50, U_RIGHT_TO_LEFT },
1225        { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1226        { 0xFE70, U_LEFT_TO_RIGHT },
1227        { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1228        { 0x10800, U_LEFT_TO_RIGHT },
1229        { 0x11000, U_RIGHT_TO_LEFT },
1230        { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1231        { 0x1EE00, U_RIGHT_TO_LEFT },
1232        { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1233        { 0x1F000, U_RIGHT_TO_LEFT },
1234        { 0x110000, U_LEFT_TO_RIGHT }
1235    };
1236
1237    UChar32 c;
1238    int32_t i;
1239    UCharDirection shouldBeDir;
1240
1241    /*
1242     * LineBreak.txt specifies:
1243     *   #  - Assigned characters that are not listed explicitly are given the value
1244     *   #    "AL".
1245     *   #  - Unassigned characters are given the value "XX".
1246     *
1247     * PUA characters are listed explicitly with "XX".
1248     * Verify that no assigned character has "XX".
1249     */
1250    if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1251        c=start;
1252        while(c<limit) {
1253            if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1254                log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1255            }
1256            ++c;
1257        }
1258    }
1259
1260    /*
1261     * Verify default Bidi classes.
1262     * For recent Unicode versions, see UCD.html.
1263     *
1264     * For older Unicode versions:
1265     * See table 3-7 "Bidirectional Character Types" in UAX #9.
1266     * http://www.unicode.org/reports/tr9/
1267     *
1268     * See also DerivedBidiClass.txt for Cn code points!
1269     *
1270     * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1271     * changed some default values.
1272     * In particular, non-characters and unassigned Default Ignorable Code Points
1273     * change from L to BN.
1274     *
1275     * UCD.html version 4.0.1 does not yet reflect these changes.
1276     */
1277    if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1278        /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1279        c=start;
1280        for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1281            if((int32_t)c<defaultBidi[i][0]) {
1282                while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1283                    if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1284                        shouldBeDir=U_BOUNDARY_NEUTRAL;
1285                    } else {
1286                        shouldBeDir=(UCharDirection)defaultBidi[i][1];
1287                    }
1288
1289                    if( u_charDirection(c)!=shouldBeDir ||
1290                        u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1291                    ) {
1292                        log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1293                            c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1294                    }
1295                    ++c;
1296                }
1297            }
1298        }
1299    }
1300
1301    return TRUE;
1302}
1303
1304/* tests for several properties */
1305static void TestUnicodeData()
1306{
1307    UVersionInfo expectVersionArray;
1308    UVersionInfo versionArray;
1309    char *fields[15][2];
1310    UErrorCode errorCode;
1311    UChar32 c;
1312    int8_t type;
1313
1314    UnicodeDataContext context;
1315
1316    u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1317    u_getUnicodeVersion(versionArray);
1318    if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1319    {
1320        log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1321        versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1322    }
1323
1324#if defined(ICU_UNICODE_VERSION)
1325    /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1326    if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1327    {
1328         log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1329    }
1330#endif
1331
1332    if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1333        log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1334    }
1335
1336    errorCode=U_ZERO_ERROR;
1337#if !UCONFIG_NO_NORMALIZATION
1338    context.nfc=unorm2_getNFCInstance(&errorCode);
1339    context.nfkc=unorm2_getNFKCInstance(&errorCode);
1340    if(U_FAILURE(errorCode)) {
1341        log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1342        return;
1343    }
1344#endif
1345    parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1346    if(U_FAILURE(errorCode)) {
1347        return; /* if we couldn't parse UnicodeData.txt, we should return */
1348    }
1349
1350    /* sanity check on repeated properties */
1351    for(c=0xfffe; c<=0x10ffff;) {
1352        type=u_charType(c);
1353        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1354            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1355        }
1356        if(type!=U_UNASSIGNED) {
1357            log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1358        }
1359        if((c&0xffff)==0xfffe) {
1360            ++c;
1361        } else {
1362            c+=0xffff;
1363        }
1364    }
1365
1366    /* test that PUA is not "unassigned" */
1367    for(c=0xe000; c<=0x10fffd;) {
1368        type=u_charType(c);
1369        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1370            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1371        }
1372        if(type==U_UNASSIGNED) {
1373            log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1374        } else if(type!=U_PRIVATE_USE_CHAR) {
1375            log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1376        }
1377        if(c==0xf8ff) {
1378            c=0xf0000;
1379        } else if(c==0xffffd) {
1380            c=0x100000;
1381        } else {
1382            ++c;
1383        }
1384    }
1385
1386    /* test u_enumCharTypes() */
1387    u_enumCharTypes(enumTypeRange, "a1");
1388
1389    /* check default properties */
1390    u_enumCharTypes(enumDefaultsRange, NULL);
1391}
1392
1393static void TestCodeUnit(){
1394    const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1395
1396    int32_t i;
1397
1398    for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1399        UChar c=codeunit[i];
1400        if(i<4){
1401            if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1402                log_err("ERROR: U+%04x is a single", c);
1403            }
1404
1405        }
1406        if(i >= 4 && i< 8){
1407            if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1408                log_err("ERROR: U+%04x is a first surrogate", c);
1409            }
1410        }
1411        if(i >= 8 && i< 12){
1412            if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1413                log_err("ERROR: U+%04x is a second surrogate", c);
1414            }
1415        }
1416    }
1417
1418}
1419
1420static void TestCodePoint(){
1421    const UChar32 codePoint[]={
1422        /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1423        0xd800,
1424        0xdbff,
1425        0xdc00,
1426        0xdfff,
1427        0xdc04,
1428        0xd821,
1429        /*not a surrogate, valid, isUnicodeChar , not Error*/
1430        0x20ac,
1431        0xd7ff,
1432        0xe000,
1433        0xe123,
1434        0x0061,
1435        0xe065,
1436        0x20402,
1437        0x24506,
1438        0x23456,
1439        0x20402,
1440        0x10402,
1441        0x23456,
1442        /*not a surrogate, not valid, isUnicodeChar, isError */
1443        0x0015,
1444        0x009f,
1445        /*not a surrogate, not valid, not isUnicodeChar, isError */
1446        0xffff,
1447        0xfffe,
1448    };
1449    int32_t i;
1450    for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1451        UChar32 c=codePoint[i];
1452        if(i<6){
1453            if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1454                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1455            }
1456            if(UTF_IS_VALID(c)){
1457                log_err("ERROR: isValid() failed for U+%04x\n", c);
1458            }
1459            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1460                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1461            }
1462            if(UTF_IS_ERROR(c)){
1463                log_err("ERROR: isError() failed for U+%04x\n", c);
1464            }
1465        }else if(i >=6 && i<18){
1466            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1467                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1468            }
1469            if(!UTF_IS_VALID(c)){
1470                log_err("ERROR: isValid() failed for U+%04x\n", c);
1471            }
1472            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1473                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1474            }
1475            if(UTF_IS_ERROR(c)){
1476                log_err("ERROR: isError() failed for U+%04x\n", c);
1477            }
1478        }else if(i >=18 && i<20){
1479            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1480                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1481            }
1482            if(UTF_IS_VALID(c)){
1483                log_err("ERROR: isValid() failed for U+%04x\n", c);
1484            }
1485            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1486                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1487            }
1488            if(!UTF_IS_ERROR(c)){
1489                log_err("ERROR: isError() failed for U+%04x\n", c);
1490            }
1491        }
1492        else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1493            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1494                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1495            }
1496            if(UTF_IS_VALID(c)){
1497                log_err("ERROR: isValid() failed for U+%04x\n", c);
1498            }
1499            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1500                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1501            }
1502            if(!UTF_IS_ERROR(c)){
1503                log_err("ERROR: isError() failed for U+%04x\n", c);
1504            }
1505        }
1506    }
1507
1508    if(
1509        !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1510        !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1511        U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1512        U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1513    ) {
1514        log_err("error with U_IS_BMP()\n");
1515    }
1516
1517    if(
1518        U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1519        U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1520        U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1521        !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1522    ) {
1523        log_err("error with U_IS_SUPPLEMENTARY()\n");
1524    }
1525}
1526
1527static void TestCharLength()
1528{
1529    const int32_t codepoint[]={
1530        1, 0x0061,
1531        1, 0xe065,
1532        1, 0x20ac,
1533        2, 0x20402,
1534        2, 0x23456,
1535        2, 0x24506,
1536        2, 0x20402,
1537        2, 0x10402,
1538        1, 0xd7ff,
1539        1, 0xe000
1540    };
1541
1542    int32_t i;
1543    UBool multiple;
1544    for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1545        UChar32 c=codepoint[i+1];
1546        if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1547            log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1548        }
1549        multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1550        if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1551            log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1552        }
1553    }
1554}
1555
1556/*internal functions ----*/
1557static int32_t MakeProp(char* str)
1558{
1559    int32_t result = 0;
1560    char* matchPosition =0;
1561
1562    matchPosition = strstr(tagStrings, str);
1563    if (matchPosition == 0)
1564    {
1565        log_err("unrecognized type letter ");
1566        log_err(str);
1567    }
1568    else
1569        result = (int32_t)((matchPosition - tagStrings) / 2);
1570    return result;
1571}
1572
1573static int32_t MakeDir(char* str)
1574{
1575    int32_t pos = 0;
1576    for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
1577        if (strcmp(str, dirStrings[pos]) == 0) {
1578            return pos;
1579        }
1580    }
1581    return -1;
1582}
1583
1584/* test u_charName() -------------------------------------------------------- */
1585
1586static const struct {
1587    uint32_t code;
1588    const char *name, *oldName, *extName, *alias;
1589} names[]={
1590    {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1591    {0x01a2, "LATIN CAPITAL LETTER OI", "",
1592             "LATIN CAPITAL LETTER OI",
1593             "LATIN CAPITAL LETTER GHA"},
1594    {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1595             "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1596    {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1597             "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1598             "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1599    {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1600    {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1601    {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1602    {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1603    {0xd800, "", "", "<lead surrogate-D800>" },
1604    {0xdc00, "", "", "<trail surrogate-DC00>" },
1605    {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1606    {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1607    {0xffff, "", "", "<noncharacter-FFFF>" },
1608    {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1609              "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1610              "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1611    {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1612};
1613
1614static UBool
1615enumCharNamesFn(void *context,
1616                UChar32 code, UCharNameChoice nameChoice,
1617                const char *name, int32_t length) {
1618    int32_t *pCount=(int32_t *)context;
1619    const char *expected;
1620    int i;
1621
1622    if(length<=0 || length!=(int32_t)strlen(name)) {
1623        /* should not be called with an empty string or invalid length */
1624        log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1625        return TRUE;
1626    }
1627
1628    ++*pCount;
1629    for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1630        if(code==(UChar32)names[i].code) {
1631            switch (nameChoice) {
1632                case U_EXTENDED_CHAR_NAME:
1633                    if(0!=strcmp(name, names[i].extName)) {
1634                        log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1635                    }
1636                    break;
1637                case U_UNICODE_CHAR_NAME:
1638                    if(0!=strcmp(name, names[i].name)) {
1639                        log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1640                    }
1641                    break;
1642                case U_UNICODE_10_CHAR_NAME:
1643                    expected=names[i].oldName;
1644                    if(expected[0]==0 || 0!=strcmp(name, expected)) {
1645                        log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1646                    }
1647                    break;
1648                case U_CHAR_NAME_ALIAS:
1649                    expected=names[i].alias;
1650                    if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1651                        log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1652                    }
1653                    break;
1654                case U_CHAR_NAME_CHOICE_COUNT:
1655                    break;
1656            }
1657            break;
1658        }
1659    }
1660    return TRUE;
1661}
1662
1663struct enumExtCharNamesContext {
1664    uint32_t length;
1665    int32_t last;
1666};
1667
1668static UBool
1669enumExtCharNamesFn(void *context,
1670                UChar32 code, UCharNameChoice nameChoice,
1671                const char *name, int32_t length) {
1672    struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1673
1674    if (ecncp->last != (int32_t) code - 1) {
1675        if (ecncp->last < 0) {
1676            log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1677        } else {
1678            log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1679        }
1680    }
1681    ecncp->last = (int32_t) code;
1682
1683    if (!*name) {
1684        log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1685    }
1686
1687    return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1688}
1689
1690/**
1691 * This can be made more efficient by moving it into putil.c and having
1692 * it directly access the ebcdic translation tables.
1693 * TODO: If we get this method in putil.c, then delete it from here.
1694 */
1695static UChar
1696u_charToUChar(char c) {
1697    UChar uc;
1698    u_charsToUChars(&c, &uc, 1);
1699    return uc;
1700}
1701
1702static void
1703TestCharNames() {
1704    static char name[80];
1705    UErrorCode errorCode=U_ZERO_ERROR;
1706    struct enumExtCharNamesContext extContext;
1707    const char *expected;
1708    int32_t length;
1709    UChar32 c;
1710    int32_t i;
1711
1712    log_verbose("Testing uprv_getMaxCharNameLength()\n");
1713    length=uprv_getMaxCharNameLength();
1714    if(length==0) {
1715        /* no names data available */
1716        return;
1717    }
1718    if(length<83) { /* Unicode 3.2 max char name length */
1719        log_err("uprv_getMaxCharNameLength()=%d is too short");
1720    }
1721    /* ### TODO same tests for max ISO comment length as for max name length */
1722
1723    log_verbose("Testing u_charName()\n");
1724    for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1725        /* modern Unicode character name */
1726        length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1727        if(U_FAILURE(errorCode)) {
1728            log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1729            return;
1730        }
1731        if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1732            log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1733        }
1734
1735        /* find the modern name */
1736        if (*names[i].name) {
1737            c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1738            if(U_FAILURE(errorCode)) {
1739                log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1740                return;
1741            }
1742            if(c!=(UChar32)names[i].code) {
1743                log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1744            }
1745        }
1746
1747        /* Unicode 1.0 character name */
1748        length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1749        if(U_FAILURE(errorCode)) {
1750            log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1751            return;
1752        }
1753        if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1754            log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1755        }
1756
1757        /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1758        if(names[i].oldName[0]!=0 /* && length>0 */) {
1759            c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1760            if(U_FAILURE(errorCode)) {
1761                log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1762                return;
1763            }
1764            if(c!=(UChar32)names[i].code) {
1765                log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1766            }
1767        }
1768
1769        /* Unicode character name alias */
1770        length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1771        if(U_FAILURE(errorCode)) {
1772            log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1773            return;
1774        }
1775        expected=names[i].alias;
1776        if(expected==NULL) {
1777            expected="";
1778        }
1779        if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1780            log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1781                    names[i].code, name, length, expected);
1782        }
1783
1784        /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1785        if(expected[0]!=0 /* && length>0 */) {
1786            c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1787            if(U_FAILURE(errorCode)) {
1788                log_err("u_charFromName(%s - alias) error %s\n",
1789                        expected, u_errorName(errorCode));
1790                return;
1791            }
1792            if(c!=(UChar32)names[i].code) {
1793                log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1794                        expected, c, names[i].code);
1795            }
1796        }
1797    }
1798
1799    /* test u_enumCharNames() */
1800    length=0;
1801    errorCode=U_ZERO_ERROR;
1802    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1803    if(U_FAILURE(errorCode) || length<94140) {
1804        log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1805    }
1806
1807    extContext.length = 0;
1808    extContext.last = -1;
1809    errorCode=U_ZERO_ERROR;
1810    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1811    if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1812        log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1813    }
1814
1815    /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1816    if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1817        log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1818    }
1819
1820    /* Test getCharNameCharacters */
1821    if(!getTestOption(QUICK_OPTION)) {
1822        enum { BUFSIZE = 256 };
1823        UErrorCode ec = U_ZERO_ERROR;
1824        char buf[BUFSIZE];
1825        int32_t maxLength;
1826        UChar32 cp;
1827        UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1828        int32_t l1, l2;
1829        UBool map[256];
1830        UBool ok;
1831
1832        USet* set = uset_open(1, 0); /* empty set */
1833        USet* dumb = uset_open(1, 0); /* empty set */
1834
1835        /*
1836         * uprv_getCharNameCharacters() will likely return more lowercase
1837         * letters than actual character names contain because
1838         * it includes all the characters in lowercased names of
1839         * general categories, for the full possible set of extended names.
1840         */
1841        {
1842            USetAdder sa={
1843                NULL,
1844                uset_add,
1845                uset_addRange,
1846                uset_addString,
1847                NULL /* don't need remove() */
1848            };
1849            sa.set=set;
1850            uprv_getCharNameCharacters(&sa);
1851        }
1852
1853        /* build set the dumb (but sure-fire) way */
1854        for (i=0; i<256; ++i) {
1855            map[i] = FALSE;
1856        }
1857
1858        maxLength=0;
1859        for (cp=0; cp<0x110000; ++cp) {
1860            int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1861                                     buf, BUFSIZE, &ec);
1862            if (U_FAILURE(ec)) {
1863                log_err("FAIL: u_charName failed when it shouldn't\n");
1864                uset_close(set);
1865                uset_close(dumb);
1866                return;
1867            }
1868            if(len>maxLength) {
1869                maxLength=len;
1870            }
1871
1872            for (i=0; i<len; ++i) {
1873                if (!map[(uint8_t) buf[i]]) {
1874                    uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1875                    map[(uint8_t) buf[i]] = TRUE;
1876                }
1877            }
1878
1879            /* test for leading/trailing whitespace */
1880            if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1881                log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1882            }
1883        }
1884
1885        if(map[(uint8_t)'\t']) {
1886            log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1887        }
1888
1889        length=uprv_getMaxCharNameLength();
1890        if(length!=maxLength) {
1891            log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1892                    length, maxLength);
1893        }
1894
1895        /* compare the sets.  Where is my uset_equals?!! */
1896        ok=TRUE;
1897        for(i=0; i<256; ++i) {
1898            if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1899                if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1900                    /* ignore lowercase a-z that are in set but not in dumb */
1901                    ok=TRUE;
1902                } else {
1903                    ok=FALSE;
1904                    break;
1905                }
1906            }
1907        }
1908
1909        l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1910        l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1911        if (U_FAILURE(ec)) {
1912            log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1913            uset_close(set);
1914            uset_close(dumb);
1915            return;
1916        }
1917
1918        if (l1 >= BUFSIZE) {
1919            l1 = BUFSIZE-1;
1920            pat[l1] = 0;
1921        }
1922        if (l2 >= BUFSIZE) {
1923            l2 = BUFSIZE-1;
1924            dumbPat[l2] = 0;
1925        }
1926
1927        if (!ok) {
1928            log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1929                    aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1930        } else if(getTestOption(VERBOSITY_OPTION)) {
1931            log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1932        }
1933
1934        uset_close(set);
1935        uset_close(dumb);
1936    }
1937
1938    /* ### TODO: test error cases and other interesting things */
1939}
1940
1941/* test u_isMirrored() and u_charMirror() ----------------------------------- */
1942
1943static void
1944TestMirroring() {
1945    USet *set;
1946    UErrorCode errorCode;
1947
1948    UChar32 start, end, c2, c3;
1949    int32_t i;
1950
1951    U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1952
1953    U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1954
1955    log_verbose("Testing u_isMirrored()\n");
1956    if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1957         !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1958        )
1959    ) {
1960        log_err("u_isMirrored() does not work correctly\n");
1961    }
1962
1963    log_verbose("Testing u_charMirror()\n");
1964    if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1965         u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1966         u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1967         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1968         u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1969         )
1970    ) {
1971        log_err("u_charMirror() does not work correctly\n");
1972    }
1973
1974    /* verify that Bidi_Mirroring_Glyph roundtrips */
1975    errorCode=U_ZERO_ERROR;
1976    set=uset_openPattern(mirroredPattern, 17, &errorCode);
1977
1978    if (U_FAILURE(errorCode)) {
1979        log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
1980    } else {
1981        for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1982            do {
1983                c2=u_charMirror(start);
1984                c3=u_charMirror(c2);
1985                if(c3!=start) {
1986                    log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1987                }
1988                c3=u_getBidiPairedBracket(start);
1989                if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
1990                    if(c3!=start) {
1991                        log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
1992                                (long)start);
1993                    }
1994                } else {
1995                    if(c3!=c2) {
1996                        log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
1997                                (long)start, (long)c2);
1998                    }
1999                }
2000            } while(++start<=end);
2001        }
2002    }
2003
2004    uset_close(set);
2005}
2006
2007
2008struct RunTestData
2009{
2010    const char *runText;
2011    UScriptCode runCode;
2012};
2013
2014typedef struct RunTestData RunTestData;
2015
2016static void
2017CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2018                const char *prefix)
2019{
2020    int32_t run, runStart, runLimit;
2021    UScriptCode runCode;
2022
2023    /* iterate over all the runs */
2024    run = 0;
2025    while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2026        if (runStart != runStarts[run]) {
2027            log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2028                prefix, run, runStarts[run], runStart);
2029        }
2030
2031        if (runLimit != runStarts[run + 1]) {
2032            log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2033                prefix, run, runStarts[run + 1], runLimit);
2034        }
2035
2036        if (runCode != testData[run].runCode) {
2037            log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2038                prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2039        }
2040
2041        run += 1;
2042
2043        /* stop when we've seen all the runs we expect to see */
2044        if (run >= nRuns) {
2045            break;
2046        }
2047    }
2048
2049    /* Complain if we didn't see then number of runs we expected */
2050    if (run != nRuns) {
2051        log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2052    }
2053}
2054
2055static void
2056TestUScriptRunAPI()
2057{
2058    static const RunTestData testData1[] = {
2059        {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2060        {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2061        {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2062        {"English (", USCRIPT_LATIN},
2063        {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2064        {") ", USCRIPT_LATIN},
2065        {"\\u6F22\\u5B75", USCRIPT_HAN},
2066        {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2067        {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2068        {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2069    };
2070
2071    static const RunTestData testData2[] = {
2072       {"((((((((((abc))))))))))", USCRIPT_LATIN}
2073    };
2074
2075    static const struct {
2076      const RunTestData *testData;
2077      int32_t nRuns;
2078    } testDataEntries[] = {
2079        {testData1, LENGTHOF(testData1)},
2080        {testData2, LENGTHOF(testData2)}
2081    };
2082
2083    static const int32_t nTestEntries = LENGTHOF(testDataEntries);
2084    int32_t testEntry;
2085
2086    for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2087        UChar testString[1024];
2088        int32_t runStarts[256];
2089        int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2090        const RunTestData *testData = testDataEntries[testEntry].testData;
2091
2092        int32_t run, stringLimit;
2093        UScriptRun *scriptRun = NULL;
2094        UErrorCode err;
2095
2096        /*
2097         * Fill in the test string and the runStarts array.
2098         */
2099        stringLimit = 0;
2100        for (run = 0; run < nTestRuns; run += 1) {
2101            runStarts[run] = stringLimit;
2102            stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2103            /*stringLimit -= 1;*/
2104        }
2105
2106        /* The limit of the last run */
2107        runStarts[nTestRuns] = stringLimit;
2108
2109        /*
2110         * Make sure that calling uscript_OpenRun with a NULL text pointer
2111         * and a non-zero text length returns the correct error.
2112         */
2113        err = U_ZERO_ERROR;
2114        scriptRun = uscript_openRun(NULL, stringLimit, &err);
2115
2116        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2117            log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2118        }
2119
2120        if (scriptRun != NULL) {
2121            log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2122            uscript_closeRun(scriptRun);
2123        }
2124
2125        /*
2126         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2127         * and a zero text length returns the correct error.
2128         */
2129        err = U_ZERO_ERROR;
2130        scriptRun = uscript_openRun(testString, 0, &err);
2131
2132        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2133            log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2134        }
2135
2136        if (scriptRun != NULL) {
2137            log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2138            uscript_closeRun(scriptRun);
2139        }
2140
2141        /*
2142         * Make sure that calling uscript_openRun with a NULL text pointer
2143         * and a zero text length doesn't return an error.
2144         */
2145        err = U_ZERO_ERROR;
2146        scriptRun = uscript_openRun(NULL, 0, &err);
2147
2148        if (U_FAILURE(err)) {
2149            log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2150        }
2151
2152        /* Make sure that the empty iterator doesn't find any runs */
2153        if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2154            log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2155        }
2156
2157        /*
2158         * Make sure that calling uscript_setRunText with a NULL text pointer
2159         * and a non-zero text length returns the correct error.
2160         */
2161        err = U_ZERO_ERROR;
2162        uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2163
2164        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2165            log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2166        }
2167
2168        /*
2169         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2170         * and a zero text length returns the correct error.
2171         */
2172        err = U_ZERO_ERROR;
2173        uscript_setRunText(scriptRun, testString, 0, &err);
2174
2175        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2176            log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2177        }
2178
2179        /*
2180         * Now call uscript_setRunText on the empty iterator
2181         * and make sure that it works.
2182         */
2183        err = U_ZERO_ERROR;
2184        uscript_setRunText(scriptRun, testString, stringLimit, &err);
2185
2186        if (U_FAILURE(err)) {
2187            log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2188        } else {
2189            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2190        }
2191
2192        uscript_closeRun(scriptRun);
2193
2194        /*
2195         * Now open an interator over the testString
2196         * using uscript_openRun and make sure that it works
2197         */
2198        scriptRun = uscript_openRun(testString, stringLimit, &err);
2199
2200        if (U_FAILURE(err)) {
2201            log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2202        } else {
2203            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2204        }
2205
2206        /* Now reset the iterator, and make sure
2207         * that it still works.
2208         */
2209        uscript_resetRun(scriptRun);
2210
2211        CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2212
2213        /* Close the iterator */
2214        uscript_closeRun(scriptRun);
2215    }
2216}
2217
2218/* test additional, non-core properties */
2219static void
2220TestAdditionalProperties() {
2221    /* test data for u_charAge() */
2222    static const struct {
2223        UChar32 c;
2224        UVersionInfo version;
2225    } charAges[]={
2226        {0x41,    { 1, 1, 0, 0 }},
2227        {0xffff,  { 1, 1, 0, 0 }},
2228        {0x20ab,  { 2, 0, 0, 0 }},
2229        {0x2fffe, { 2, 0, 0, 0 }},
2230        {0x20ac,  { 2, 1, 0, 0 }},
2231        {0xfb1d,  { 3, 0, 0, 0 }},
2232        {0x3f4,   { 3, 1, 0, 0 }},
2233        {0x10300, { 3, 1, 0, 0 }},
2234        {0x220,   { 3, 2, 0, 0 }},
2235        {0xff60,  { 3, 2, 0, 0 }}
2236    };
2237
2238    /* test data for u_hasBinaryProperty() */
2239    static const int32_t
2240    props[][3]={ /* code point, property, value */
2241        { 0x0627, UCHAR_ALPHABETIC, TRUE },
2242        { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2243        { 0x2028, UCHAR_ALPHABETIC, FALSE },
2244
2245        { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2246        { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2247
2248        { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2249        { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2250
2251        { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2252        { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2253
2254        /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2255        { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2256        { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2257        { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2258        { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2259
2260        { 0x058a, UCHAR_DASH, TRUE },
2261        { 0x007e, UCHAR_DASH, FALSE },
2262
2263        { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2264        { 0x3000, UCHAR_DIACRITIC, FALSE },
2265
2266        { 0x0e46, UCHAR_EXTENDER, TRUE },
2267        { 0x0020, UCHAR_EXTENDER, FALSE },
2268
2269#if !UCONFIG_NO_NORMALIZATION
2270        { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2271        { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2272        { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2273
2274        { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2275        { 0x0308, UCHAR_NFD_INERT, FALSE },
2276
2277        { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2278        { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2279
2280        { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2281        { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2282        { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2283        { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2284        { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2285        { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2286
2287        { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2288        { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2289
2290        { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2291        { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2292        { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2293        { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2294        { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2295        { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2296#endif
2297
2298        { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2299        { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2300        { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2301
2302        { 0x30fb, UCHAR_HYPHEN, TRUE },
2303        { 0xfe58, UCHAR_HYPHEN, FALSE },
2304
2305        { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2306        { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2307        { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2308
2309        { 0x2172, UCHAR_ID_START, TRUE },
2310        { 0x007a, UCHAR_ID_START, TRUE },
2311        { 0x0039, UCHAR_ID_START, FALSE },
2312
2313        { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2314        { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2315        { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2316
2317        { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2318        { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2319
2320        { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2321        { 0x0345, UCHAR_LOWERCASE, TRUE },
2322        { 0x0030, UCHAR_LOWERCASE, FALSE },
2323
2324        { 0x1d7a9, UCHAR_MATH, TRUE },
2325        { 0x2135, UCHAR_MATH, TRUE },
2326        { 0x0062, UCHAR_MATH, FALSE },
2327
2328        { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2329        { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2330        { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2331
2332        { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2333        { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2334        { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2335
2336        { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2337        { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2338
2339        { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2340        { 0x2162, UCHAR_UPPERCASE, TRUE },
2341        { 0x0345, UCHAR_UPPERCASE, FALSE },
2342
2343        { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2344        { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2345        { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2346
2347        { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2348        { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2349        { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2350
2351        { 0x16ee, UCHAR_XID_START, TRUE },
2352        { 0x23456, UCHAR_XID_START, TRUE },
2353        { 0x1d1aa, UCHAR_XID_START, FALSE },
2354
2355        /*
2356         * Version break:
2357         * The following properties are only supported starting with the
2358         * Unicode version indicated in the second field.
2359         */
2360        { -1, 0x320, 0 },
2361
2362        { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2363        { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2364        { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2365
2366        { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2367        { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2368        { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2369        { 0xe0100, UCHAR_DEPRECATED, FALSE },
2370
2371        { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2372        { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2373        { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2374        { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2375
2376        { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2377        { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2378        { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2379        { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2380
2381        { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2382        { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2383
2384        { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2385        { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2386
2387        { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2388        { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2389
2390        { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2391        { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2392
2393        { 0x2e9b, UCHAR_RADICAL, TRUE },
2394        { 0x4e00, UCHAR_RADICAL, FALSE },
2395
2396        { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2397        { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2398
2399        { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2400        { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2401
2402        { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2403
2404        { 0x002e, UCHAR_S_TERM, TRUE },
2405        { 0x0061, UCHAR_S_TERM, FALSE },
2406
2407        { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2408        { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2409        { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2410        { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2411
2412        /* enum/integer type properties */
2413
2414        /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2415        /* test default Bidi classes for unassigned code points */
2416        { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2417        { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2418        { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2419        { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2420        { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2421        { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2422        { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2423        { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2424        { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2425        { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2426        { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2427
2428        { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2429        { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2430        { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2431        { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2432        { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2433        { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2434        { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2435        { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2436
2437        { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2438        { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2439        { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2440        { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2441        { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2442        { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2443        { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2444        { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2445        { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2446        { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2447        { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2448
2449        /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2450        { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2451
2452        { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2453        { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2454        { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2455        { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2456        { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2457        { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2458        { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2459        { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2460        { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2461
2462        { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2463        { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2464        { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2465        { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2466        { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2467        { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2468        { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2469        { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2470        { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2471        { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2472        { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2473        { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2474        { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2475        { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2476        { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2477        { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2478        { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2479
2480        /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2481        { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2482        { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2483
2484        { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2485        { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2486        { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2487        { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2488        { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2489
2490        { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2491        { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2492        { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2493        { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2494        { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2495        { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2496        { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2497        { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2498
2499        /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2500        { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2501        { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2502        { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2503        { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2504        { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2505        { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2506        { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2507        { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2508        { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2509        { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2510        { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2511        { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2512        { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2513        { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2514        { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2515
2516        /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2517
2518        /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2519
2520        { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2521        { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2522        { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2523        { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2524        { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2525        { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2526        { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2527
2528        { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2529        { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2530        { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2531        { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2532
2533        { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2534        { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2535        { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2536        { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2537        { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2538        { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2539
2540        { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2541        { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2542        { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2543        { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2544
2545        { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2546        { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2547        { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2548        { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2549        { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2550        { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2551        { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2552
2553        { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2554        { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2555        { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2556        { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2557
2558        { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2559        { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2560        { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2561        { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2562
2563        { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2564        { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2565        { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2566        { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2567        { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2568
2569        { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2570
2571        { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2572
2573        { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2574        { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2575        { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2576
2577        { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2578        { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2579        { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2580        { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2581        { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2582
2583        { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2584        { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2585        { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2586
2587        { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2588        { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2589        { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2590        { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2591
2592        { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2593        { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2594        { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2595        { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2596        { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2597        { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2598
2599        { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2600        { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2601        { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2602        { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2603
2604        { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2605        { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2606        { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2607        { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2608
2609        { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2610        { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2611        { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2612        { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2613
2614        { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2615
2616        /* unassigned code points in new default Bidi R blocks */
2617        { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2618        { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2619
2620        /* test some script codes >127 */
2621        { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2622        { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2623        { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2624
2625        { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2626
2627        /* value changed in Unicode 6.0 */
2628        { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2629
2630        { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2631
2632        /* unassigned code points in new/changed default Bidi AL blocks */
2633        { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2634        { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2635
2636        { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2637
2638        /* unassigned code points in the currency symbols block now default to ET */
2639        { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2640        { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2641
2642        /* new property in Unicode 6.3 */
2643        { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2644        { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2645        { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2646        { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2647        { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2648        { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2649
2650        /* undefined UProperty values */
2651        { 0x61, 0x4a7, 0 },
2652        { 0x234bc, 0x15ed, 0 }
2653    };
2654
2655    UVersionInfo version;
2656    UChar32 c;
2657    int32_t i, result, uVersion;
2658    UProperty which;
2659
2660    /* what is our Unicode version? */
2661    u_getUnicodeVersion(version);
2662    uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2663
2664    u_charAge(0x20, version);
2665    if(version[0]==0) {
2666        /* no additional properties available */
2667        log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2668        return;
2669    }
2670
2671    /* test u_charAge() */
2672    for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2673        u_charAge(charAges[i].c, version);
2674        if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2675            log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2676                charAges[i].c,
2677                version[0], version[1], version[2], version[3],
2678                charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2679        }
2680    }
2681
2682    if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2683        u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2684        u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2685        u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2686        u_getIntPropertyMinValue(0x2345)!=0
2687    ) {
2688        log_err("error: u_getIntPropertyMinValue() wrong\n");
2689    }
2690    if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2691        log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2692    }
2693    if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2694        log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2695    }
2696    if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2697        log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2698    }
2699    if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2700        log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2701    }
2702    if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2703        log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2704    }
2705    if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2706        log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2707    }
2708    if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2709        log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2710    }
2711    if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2712        log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2713    }
2714    if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2715        log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2716    }
2717    if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2718        log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2719    }
2720    if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2721        log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2722    }
2723    if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2724        log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2725    }
2726    if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2727        log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2728    }
2729    if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2730        log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2731    }
2732    /*JB#2410*/
2733    if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2734        log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2735    }
2736    if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2737        log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2738    }
2739    if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2740        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2741    }
2742    if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2743        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2744    }
2745    if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2746        log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2747    }
2748
2749    /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2750    for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2751        const char *whichName;
2752
2753        if(props[i][0]<0) {
2754            /* Unicode version break */
2755            if(uVersion<props[i][1]) {
2756                break; /* do not test properties that are not yet supported */
2757            } else {
2758                continue; /* skip this row */
2759            }
2760        }
2761
2762        c=(UChar32)props[i][0];
2763        which=(UProperty)props[i][1];
2764        whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2765
2766        if(which<UCHAR_INT_START) {
2767            result=u_hasBinaryProperty(c, which);
2768            if(result!=props[i][2]) {
2769                log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2770                        c, whichName, result, i);
2771            }
2772        }
2773
2774        result=u_getIntPropertyValue(c, which);
2775        if(result!=props[i][2]) {
2776            log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2777                    c, whichName, result, props[i][2], i);
2778        }
2779
2780        /* test separate functions, too */
2781        switch((UProperty)props[i][1]) {
2782        case UCHAR_ALPHABETIC:
2783            if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2784                log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2785                        props[i][0], result, i);
2786            }
2787            break;
2788        case UCHAR_LOWERCASE:
2789            if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2790                log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2791                        props[i][0], result, i);
2792            }
2793            break;
2794        case UCHAR_UPPERCASE:
2795            if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2796                log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2797                        props[i][0], result, i);
2798            }
2799            break;
2800        case UCHAR_WHITE_SPACE:
2801            if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2802                log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2803                        props[i][0], result, i);
2804            }
2805            break;
2806        default:
2807            break;
2808        }
2809    }
2810}
2811
2812static void
2813TestNumericProperties(void) {
2814    /* see UnicodeData.txt, DerivedNumericValues.txt */
2815    static const struct {
2816        UChar32 c;
2817        int32_t type;
2818        double numValue;
2819    } values[]={
2820        { 0x0F33, U_NT_NUMERIC, -1./2. },
2821        { 0x0C66, U_NT_DECIMAL, 0 },
2822        { 0x96f6, U_NT_NUMERIC, 0 },
2823        { 0xa833, U_NT_NUMERIC, 1./16. },
2824        { 0x2152, U_NT_NUMERIC, 1./10. },
2825        { 0x2151, U_NT_NUMERIC, 1./9. },
2826        { 0x1245f, U_NT_NUMERIC, 1./8. },
2827        { 0x2150, U_NT_NUMERIC, 1./7. },
2828        { 0x2159, U_NT_NUMERIC, 1./6. },
2829        { 0x09f6, U_NT_NUMERIC, 3./16. },
2830        { 0x2155, U_NT_NUMERIC, 1./5. },
2831        { 0x00BD, U_NT_NUMERIC, 1./2. },
2832        { 0x0031, U_NT_DECIMAL, 1. },
2833        { 0x4e00, U_NT_NUMERIC, 1. },
2834        { 0x58f1, U_NT_NUMERIC, 1. },
2835        { 0x10320, U_NT_NUMERIC, 1. },
2836        { 0x0F2B, U_NT_NUMERIC, 3./2. },
2837        { 0x00B2, U_NT_DIGIT, 2. },
2838        { 0x5f10, U_NT_NUMERIC, 2. },
2839        { 0x1813, U_NT_DECIMAL, 3. },
2840        { 0x5f0e, U_NT_NUMERIC, 3. },
2841        { 0x2173, U_NT_NUMERIC, 4. },
2842        { 0x8086, U_NT_NUMERIC, 4. },
2843        { 0x278E, U_NT_DIGIT, 5. },
2844        { 0x1D7F2, U_NT_DECIMAL, 6. },
2845        { 0x247A, U_NT_DIGIT, 7. },
2846        { 0x7396, U_NT_NUMERIC, 9. },
2847        { 0x1372, U_NT_NUMERIC, 10. },
2848        { 0x216B, U_NT_NUMERIC, 12. },
2849        { 0x16EE, U_NT_NUMERIC, 17. },
2850        { 0x249A, U_NT_NUMERIC, 19. },
2851        { 0x303A, U_NT_NUMERIC, 30. },
2852        { 0x5345, U_NT_NUMERIC, 30. },
2853        { 0x32B2, U_NT_NUMERIC, 37. },
2854        { 0x1375, U_NT_NUMERIC, 40. },
2855        { 0x10323, U_NT_NUMERIC, 50. },
2856        { 0x0BF1, U_NT_NUMERIC, 100. },
2857        { 0x964c, U_NT_NUMERIC, 100. },
2858        { 0x217E, U_NT_NUMERIC, 500. },
2859        { 0x2180, U_NT_NUMERIC, 1000. },
2860        { 0x4edf, U_NT_NUMERIC, 1000. },
2861        { 0x2181, U_NT_NUMERIC, 5000. },
2862        { 0x137C, U_NT_NUMERIC, 10000. },
2863        { 0x4e07, U_NT_NUMERIC, 10000. },
2864        { 0x12432, U_NT_NUMERIC, 216000. },
2865        { 0x12433, U_NT_NUMERIC, 432000. },
2866        { 0x4ebf, U_NT_NUMERIC, 100000000. },
2867        { 0x5146, U_NT_NUMERIC, 1000000000000. },
2868        { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2869        { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2870        { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2871        { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2872        { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2873        { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2874        { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2875        { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2876    };
2877
2878    double nv;
2879    UChar32 c;
2880    int32_t i, type;
2881
2882    for(i=0; i<LENGTHOF(values); ++i) {
2883        c=values[i].c;
2884        type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2885        nv=u_getNumericValue(c);
2886
2887        if(type!=values[i].type) {
2888            log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2889        }
2890        if(0.000001 <= fabs(nv - values[i].numValue)) {
2891            log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2892        }
2893    }
2894}
2895
2896/**
2897 * Test the property names and property value names API.
2898 */
2899static void
2900TestPropertyNames(void) {
2901    int32_t p, v, choice=0, rev;
2902    UBool atLeastSomething = FALSE;
2903
2904    for (p=0; ; ++p) {
2905        UProperty propEnum = (UProperty)p;
2906        UBool sawProp = FALSE;
2907        if(p > 10 && !atLeastSomething) {
2908          log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2909          return;
2910        }
2911
2912        for (choice=0; ; ++choice) {
2913            const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2914            if (name) {
2915                if (!sawProp)
2916                    log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2917                log_verbose("%d=\"%s\"", choice, name);
2918                sawProp = TRUE;
2919                atLeastSomething = TRUE;
2920
2921                /* test reverse mapping */
2922                rev = u_getPropertyEnum(name);
2923                if (rev != p) {
2924                    log_err("Property round-trip failure: %d -> %s -> %d\n",
2925                            p, name, rev);
2926                }
2927            }
2928            if (!name && choice>0) break;
2929        }
2930        if (sawProp) {
2931            /* looks like a valid property; check the values */
2932            const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2933            int32_t max = 0;
2934            if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2935                max = 255;
2936            } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2937                /* it's far too slow to iterate all the way up to
2938                   the real max, U_GC_P_MASK */
2939                max = U_GC_NL_MASK;
2940            } else if (p == UCHAR_BLOCK) {
2941                /* UBlockCodes, unlike other values, start at 1 */
2942                max = 1;
2943            }
2944            log_verbose("\n");
2945            for (v=-1; ; ++v) {
2946                UBool sawValue = FALSE;
2947                for (choice=0; ; ++choice) {
2948                    const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2949                    if (vname) {
2950                        if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2951                        log_verbose("%d=\"%s\"", choice, vname);
2952                        sawValue = TRUE;
2953
2954                        /* test reverse mapping */
2955                        rev = u_getPropertyValueEnum(propEnum, vname);
2956                        if (rev != v) {
2957                            log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2958                                    pname, v, vname, rev);
2959                        }
2960                    }
2961                    if (!vname && choice>0) break;
2962                }
2963                if (sawValue) {
2964                    log_verbose("\n");
2965                }
2966                if (!sawValue && v>=max) break;
2967            }
2968        }
2969        if (!sawProp) {
2970            if (p>=UCHAR_STRING_LIMIT) {
2971                break;
2972            } else if (p>=UCHAR_DOUBLE_LIMIT) {
2973                p = UCHAR_STRING_START - 1;
2974            } else if (p>=UCHAR_MASK_LIMIT) {
2975                p = UCHAR_DOUBLE_START - 1;
2976            } else if (p>=UCHAR_INT_LIMIT) {
2977                p = UCHAR_MASK_START - 1;
2978            } else if (p>=UCHAR_BINARY_LIMIT) {
2979                p = UCHAR_INT_START - 1;
2980            }
2981        }
2982    }
2983}
2984
2985/**
2986 * Test the property values API.  See JB#2410.
2987 */
2988static void
2989TestPropertyValues(void) {
2990    int32_t i, p, min, max;
2991    UErrorCode ec;
2992
2993    /* Min should be 0 for everything. */
2994    /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2995    for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2996        UProperty propEnum = (UProperty)p;
2997        min = u_getIntPropertyMinValue(propEnum);
2998        if (min != 0) {
2999            if (p == UCHAR_BLOCK) {
3000                /* This is okay...for now.  See JB#2487.
3001                   TODO Update this for JB#2487. */
3002            } else {
3003                const char* name;
3004                name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3005                if (name == NULL)
3006                    name = "<ERROR>";
3007                log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3008                        name, min);
3009            }
3010        }
3011    }
3012
3013    if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3014        u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3015        log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3016    }
3017
3018    /* Max should be -1 for invalid properties. */
3019    max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
3020    if (max != -1) {
3021        log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3022                max);
3023    }
3024
3025    /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
3026    for (i=0; i<2; ++i) {
3027        int32_t script;
3028        const char* desc;
3029        ec = U_ZERO_ERROR;
3030        switch (i) {
3031        case 0:
3032            script = uscript_getScript(-1, &ec);
3033            desc = "uscript_getScript(-1)";
3034            break;
3035        case 1:
3036            script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3037            desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3038            break;
3039        default:
3040            log_err("Internal test error. Too many scripts\n");
3041            return;
3042        }
3043        /* We don't explicitly test ec.  It should be U_FAILURE but it
3044           isn't documented as such. */
3045        if (script != (int32_t)USCRIPT_INVALID_CODE) {
3046            log_err("FAIL: %s = %d, exp. 0\n",
3047                    desc, script);
3048        }
3049    }
3050}
3051
3052/* various tests for consistency of UCD data and API behavior */
3053static void
3054TestConsistency() {
3055    char buffer[300];
3056    USet *set1, *set2, *set3, *set4;
3057    UErrorCode errorCode;
3058
3059    UChar32 start, end;
3060    int32_t i, length;
3061
3062    U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3063    U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3064    U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3065    U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3066    U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3067
3068    U_STRING_DECL(mathBlocksPattern,
3069        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3070        214);
3071    U_STRING_DECL(mathPattern, "[:Math:]", 8);
3072    U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3073    U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3074    U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3075
3076    U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3077    U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3078    U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3079    U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3080    U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3081
3082    U_STRING_INIT(mathBlocksPattern,
3083        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3084        214);
3085    U_STRING_INIT(mathPattern, "[:Math:]", 8);
3086    U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3087    U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3088    U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3089
3090    /*
3091     * It used to be that UCD.html and its precursors said
3092     * "Those dashes used to mark connections between pieces of words,
3093     *  plus the Katakana middle dot."
3094     *
3095     * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3096     * but not from Hyphen.
3097     * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3098     * Therefore, do not show errors when testing the Hyphen property.
3099     */
3100    log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3101                "known to the UTC and not considered errors.\n");
3102
3103    errorCode=U_ZERO_ERROR;
3104    set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3105    set2=uset_openPattern(dashPattern, 8, &errorCode);
3106    if(U_SUCCESS(errorCode)) {
3107        /* remove the Katakana middle dot(s) from set1 */
3108        uset_remove(set1, 0x30fb);
3109        uset_remove(set1, 0xff65); /* halfwidth variant */
3110        showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3111    } else {
3112        log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3113    }
3114
3115    /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3116    set3=uset_openPattern(formatPattern, 6, &errorCode);
3117    set4=uset_openPattern(alphaPattern, 14, &errorCode);
3118    if(U_SUCCESS(errorCode)) {
3119        showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3120        showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3121        showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3122    } else {
3123        log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3124    }
3125
3126    uset_close(set1);
3127    uset_close(set2);
3128    uset_close(set3);
3129    uset_close(set4);
3130
3131    /*
3132     * Check that each lowercase character has "small" in its name
3133     * and not "capital".
3134     * There are some such characters, some of which seem odd.
3135     * Use the verbose flag to see these notices.
3136     */
3137    errorCode=U_ZERO_ERROR;
3138    set1=uset_openPattern(lowerPattern, 13, &errorCode);
3139    if(U_SUCCESS(errorCode)) {
3140        for(i=0;; ++i) {
3141            length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3142            if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3143                break; /* done */
3144            }
3145            if(U_FAILURE(errorCode)) {
3146                log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3147                        i, u_errorName(errorCode));
3148                break;
3149            }
3150            if(length!=0) {
3151                break; /* done with code points, got a string or -1 */
3152            }
3153
3154            while(start<=end) {
3155                length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3156                if(U_FAILURE(errorCode)) {
3157                    log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3158                    errorCode=U_ZERO_ERROR;
3159                }
3160                if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3161                    strstr(buffer, "SMALL CAPITAL")==NULL
3162                ) {
3163                    log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3164                }
3165                ++start;
3166            }
3167        }
3168    } else {
3169        log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3170    }
3171    uset_close(set1);
3172
3173    /* verify that all assigned characters in Math blocks are exactly Math characters */
3174    errorCode=U_ZERO_ERROR;
3175    set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3176    set2=uset_openPattern(mathPattern, 8, &errorCode);
3177    set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3178    if(U_SUCCESS(errorCode)) {
3179        uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3180        uset_complement(set3);      /* assigned characters */
3181        uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3182        compareUSets(set1, set2,
3183                     "[assigned Math block chars]", "[math blocks]&[:Math:]",
3184                     TRUE);
3185    } else {
3186        log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3187    }
3188    uset_close(set1);
3189    uset_close(set2);
3190    uset_close(set3);
3191
3192    /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3193    errorCode=U_ZERO_ERROR;
3194    set1=uset_openPattern(unknownPattern, 14, &errorCode);
3195    set2=uset_openPattern(reservedPattern, 20, &errorCode);
3196    if(U_SUCCESS(errorCode)) {
3197        compareUSets(set1, set2,
3198                     "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3199                     TRUE);
3200    } else {
3201        log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3202    }
3203    uset_close(set1);
3204    uset_close(set2);
3205}
3206
3207/*
3208 * Starting with ICU4C 3.4, the core Unicode properties files
3209 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3210 * are hardcoded in the common DLL and therefore not included
3211 * in the data package any more.
3212 * Test requiring these files are disabled so that
3213 * we need not jump through hoops (like adding snapshots of these files
3214 * to testdata).
3215 * See Jitterbug 4497.
3216 */
3217#define HARDCODED_DATA_4497 1
3218
3219/* API coverage for ucase.c */
3220static void TestUCase() {
3221#if !HARDCODED_DATA_4497
3222    UDataMemory *pData;
3223    UCaseProps *csp;
3224    const UCaseProps *ccsp;
3225    UErrorCode errorCode;
3226
3227    /* coverage for ucase_openBinary() */
3228    errorCode=U_ZERO_ERROR;
3229    pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3230    if(U_FAILURE(errorCode)) {
3231        log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3232                    u_errorName(errorCode));
3233        return;
3234    }
3235
3236    csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3237    if(U_FAILURE(errorCode)) {
3238        log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3239                u_errorName(errorCode));
3240        udata_close(pData);
3241        return;
3242    }
3243
3244    if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3245        log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3246    }
3247
3248    ucase_close(csp);
3249    udata_close(pData);
3250
3251    /* coverage for ucase_getDummy() */
3252    errorCode=U_ZERO_ERROR;
3253    ccsp=ucase_getDummy(&errorCode);
3254    if(ucase_tolower(ccsp, 0x41)!=0x41) {
3255        log_err("ucase_tolower(dummy, A)!=A\n");
3256    }
3257#endif
3258}
3259
3260/* API coverage for ubidi_props.c */
3261static void TestUBiDiProps() {
3262#if !HARDCODED_DATA_4497
3263    UDataMemory *pData;
3264    UBiDiProps *bdp;
3265    const UBiDiProps *cbdp;
3266    UErrorCode errorCode;
3267
3268    /* coverage for ubidi_openBinary() */
3269    errorCode=U_ZERO_ERROR;
3270    pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3271    if(U_FAILURE(errorCode)) {
3272        log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3273                    u_errorName(errorCode));
3274        return;
3275    }
3276
3277    bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3278    if(U_FAILURE(errorCode)) {
3279        log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3280                u_errorName(errorCode));
3281        udata_close(pData);
3282        return;
3283    }
3284
3285    if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3286        log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3287    }
3288
3289    ubidi_closeProps(bdp);
3290    udata_close(pData);
3291
3292    /* coverage for ubidi_getDummy() */
3293    errorCode=U_ZERO_ERROR;
3294    cbdp=ubidi_getDummy(&errorCode);
3295    if(ubidi_getClass(cbdp, 0x20)!=0) {
3296        log_err("ubidi_getClass(dummy, space)!=0\n");
3297    }
3298#endif
3299}
3300
3301/* test case folding, compare return values with CaseFolding.txt ------------ */
3302
3303/* bit set for which case foldings for a character have been tested already */
3304enum {
3305    CF_SIMPLE=1,
3306    CF_FULL=2,
3307    CF_TURKIC=4,
3308    CF_ALL=7
3309};
3310
3311static void
3312testFold(UChar32 c, int which,
3313         UChar32 simple, UChar32 turkic,
3314         const UChar *full, int32_t fullLength,
3315         const UChar *turkicFull, int32_t turkicFullLength) {
3316    UChar s[2], t[32];
3317    UChar32 c2;
3318    int32_t length, length2;
3319
3320    UErrorCode errorCode=U_ZERO_ERROR;
3321
3322    length=0;
3323    U16_APPEND_UNSAFE(s, length, c);
3324
3325    if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3326        log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3327    }
3328    if((which&CF_FULL)!=0) {
3329        length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3330        if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3331            log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3332        }
3333    }
3334    if((which&CF_TURKIC)!=0) {
3335        if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3336            log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3337        }
3338
3339        length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3340        if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3341            log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3342        }
3343    }
3344}
3345
3346/* test that c case-folds to itself */
3347static void
3348testFoldToSelf(UChar32 c, int which) {
3349    UChar s[2];
3350    int32_t length;
3351
3352    length=0;
3353    U16_APPEND_UNSAFE(s, length, c);
3354    testFold(c, which, c, c, s, length, s, length);
3355}
3356
3357struct CaseFoldingData {
3358    USet *notSeen;
3359    UChar32 prev, prevSimple;
3360    UChar prevFull[32];
3361    int32_t prevFullLength;
3362    int which;
3363};
3364typedef struct CaseFoldingData CaseFoldingData;
3365
3366static void U_CALLCONV
3367caseFoldingLineFn(void *context,
3368                  char *fields[][2], int32_t fieldCount,
3369                  UErrorCode *pErrorCode) {
3370    CaseFoldingData *pData=(CaseFoldingData *)context;
3371    char *end;
3372    UChar full[32];
3373    UChar32 c, prev, simple;
3374    int32_t count;
3375    int which;
3376    char status;
3377
3378    /* get code point */
3379    const char *s=u_skipWhitespace(fields[0][0]);
3380    if(0==strncmp(s, "0000..10FFFF", 12)) {
3381        /*
3382         * Ignore the line
3383         * # @missing: 0000..10FFFF; C; <code point>
3384         * because maps-to-self is already our default, and this line breaks this parser.
3385         */
3386        return;
3387    }
3388    c=(UChar32)strtoul(s, &end, 16);
3389    end=(char *)u_skipWhitespace(end);
3390    if(end<=fields[0][0] || end!=fields[0][1]) {
3391        log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3392        *pErrorCode=U_PARSE_ERROR;
3393        return;
3394    }
3395
3396    /* get the status of this mapping */
3397    status=*u_skipWhitespace(fields[1][0]);
3398    if(status!='C' && status!='S' && status!='F' && status!='T') {
3399        log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3400        *pErrorCode=U_PARSE_ERROR;
3401        return;
3402    }
3403
3404    /* get the mapping */
3405    count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3406    if(U_FAILURE(*pErrorCode)) {
3407        log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3408        return;
3409    }
3410
3411    /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3412    if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3413        simple=c;
3414    }
3415
3416    if(c!=(prev=pData->prev)) {
3417        /*
3418         * Test remaining mappings for the previous code point.
3419         * If a turkic folding was not mentioned, then it should fold the same
3420         * as the regular simple case folding.
3421         */
3422        UChar prevString[2];
3423        int32_t length;
3424
3425        length=0;
3426        U16_APPEND_UNSAFE(prevString, length, prev);
3427        testFold(prev, (~pData->which)&CF_ALL,
3428                 prev, pData->prevSimple,
3429                 prevString, length,
3430                 pData->prevFull, pData->prevFullLength);
3431        pData->prev=pData->prevSimple=c;
3432        length=0;
3433        U16_APPEND_UNSAFE(pData->prevFull, length, c);
3434        pData->prevFullLength=length;
3435        pData->which=0;
3436    }
3437
3438    /*
3439     * Turn the status into a bit set of case foldings to test.
3440     * Remember non-Turkic case foldings as defaults for Turkic mode.
3441     */
3442    switch(status) {
3443    case 'C':
3444        which=CF_SIMPLE|CF_FULL;
3445        pData->prevSimple=simple;
3446        u_memcpy(pData->prevFull, full, count);
3447        pData->prevFullLength=count;
3448        break;
3449    case 'S':
3450        which=CF_SIMPLE;
3451        pData->prevSimple=simple;
3452        break;
3453    case 'F':
3454        which=CF_FULL;
3455        u_memcpy(pData->prevFull, full, count);
3456        pData->prevFullLength=count;
3457        break;
3458    case 'T':
3459        which=CF_TURKIC;
3460        break;
3461    default:
3462        which=0;
3463        break; /* won't happen because of test above */
3464    }
3465
3466    testFold(c, which, simple, simple, full, count, full, count);
3467
3468    /* remember which case foldings of c have been tested */
3469    pData->which|=which;
3470
3471    /* remove c from the set of ones not mentioned in CaseFolding.txt */
3472    uset_remove(pData->notSeen, c);
3473}
3474
3475static void
3476TestCaseFolding() {
3477    CaseFoldingData data={ NULL };
3478    char *fields[3][2];
3479    UErrorCode errorCode;
3480
3481    static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3482
3483    errorCode=U_ZERO_ERROR;
3484    /* test BMP & plane 1 - nothing interesting above */
3485    data.notSeen=uset_open(0, 0x1ffff);
3486    data.prevFullLength=1; /* length of full case folding of U+0000 */
3487
3488    parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3489    if(U_SUCCESS(errorCode)) {
3490        int32_t i, start, end;
3491
3492        /* add a pseudo-last line to finish testing of the actual last one */
3493        fields[0][0]=lastLine;
3494        fields[0][1]=lastLine+6;
3495        fields[1][0]=lastLine+7;
3496        fields[1][1]=lastLine+9;
3497        fields[2][0]=lastLine+10;
3498        fields[2][1]=lastLine+17;
3499        caseFoldingLineFn(&data, fields, 3, &errorCode);
3500
3501        /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3502        for(i=0;
3503            0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3504                U_SUCCESS(errorCode);
3505            ++i
3506        ) {
3507            do {
3508                testFoldToSelf(start, CF_ALL);
3509            } while(++start<=end);
3510        }
3511    }
3512
3513    uset_close(data.notSeen);
3514}
3515